From 1f854d45a4b9b550ba939450378bf670bc064852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jord=C3=A3o=20Bragantini?= Date: Fri, 25 Aug 2023 18:58:17 -0700 Subject: [PATCH] Add export to trackmate format (#45) --- NOTES | 35 +++ pyproject.toml | 1 + tox.ini | 1 + ultrack/__init__.py | 1 + ultrack/analysis/utils.py | 1 + ultrack/cli/_test/test_cli.py | 14 +- ultrack/cli/export.py | 25 ++- ultrack/core/export/__init__.py | 1 + ultrack/core/export/_test/test_trackmate.py | 37 ++++ ultrack/core/export/trackmate.py | 226 ++++++++++++++++++++ 10 files changed, 340 insertions(+), 2 deletions(-) create mode 100644 NOTES create mode 100644 ultrack/core/export/_test/test_trackmate.py create mode 100644 ultrack/core/export/trackmate.py diff --git a/NOTES b/NOTES new file mode 100644 index 0000000..07702aa --- /dev/null +++ b/NOTES @@ -0,0 +1,35 @@ +# REFACTOR: + - merge multiple solver classes methods into constructor to make a single deeper function + +# TODO: + - IMPORTANT: free nodes after each hierarchy insertation to database and measure performance. + - IMPORTANT: refactor widget to use a tab per option (segment, link, track) + - add warning when DB already contains data. + - create homepage/docs. + - document config + - document database + - option to lock segmentations from hypothesis viz widget + +# NOTES: + - heuristic optimizer: + - add monte-carlo (probabilist) moves into local search function + - local search by removing a node and trying to add its overlapping nodes + + - slurm: + - segment: 1hr 30min, 790 jobs + - link: 30min, 789 jobs + - track 1st: 2hr 30min, 8 jobs of 100 time points + overlap + - track 2nd: 2hr, 8 jobs of 100 time points + overlap + - export: 20min + - total: about 7 hours + - NOTES: + - fix window overlap sql solution update by adding parents only when it already exists on solution + - segment step memory usage must be improved, with Daxi dataset it's using about 100GB per frame + - tracks should be divided into smaller chunks + - reduce tracking model build time with vector variables + +# ARTICLE: + - experiments comparing tracking results from essemble of connected components, watershed, cellpose, stardist vs their accuracy alone; + - experiments comparing results from binary contour (their own algorithm post processing) vs contour obtained from network output directly; + - cell-tracking challenge benchmark; + - qualitative results our dataset and jan funke datasets; diff --git a/pyproject.toml b/pyproject.toml index 977ce35..8836d16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ pre-commit = "^3.2.2" pytest-qt = "^4.2.0" asv = "^0.5.1" "testing.postgresql" = "^1.3.0" +pytrackmate = {git = "https://github.com/hadim/pytrackmate.git"} [tool.poetry.extras] flow = [ diff --git a/tox.ini b/tox.ini index e492f16..12998aa 100644 --- a/tox.ini +++ b/tox.ini @@ -34,6 +34,7 @@ deps = pyqt5 poetry testing.postgresql + git+https://github.com/hadim/pytrackmate.git commands = pytest -v --color=yes --cov=ultrack --cov-report=xml --durations=15 --ignore=ultrack/widgets diff --git a/ultrack/__init__.py b/ultrack/__init__.py index 4f1b1df..7356d24 100644 --- a/ultrack/__init__.py +++ b/ultrack/__init__.py @@ -12,6 +12,7 @@ from ultrack.config.config import MainConfig, load_config from ultrack.core.export.ctc import to_ctc +from ultrack.core.export.trackmate import to_trackmate from ultrack.core.export.tracks_layer import to_tracks_layer from ultrack.core.export.zarr import tracks_to_zarr from ultrack.core.linking.processing import link diff --git a/ultrack/analysis/utils.py b/ultrack/analysis/utils.py index de8866b..4192865 100644 --- a/ultrack/analysis/utils.py +++ b/ultrack/analysis/utils.py @@ -156,6 +156,7 @@ def get_subgraph( compressed_df["track_id"].to_numpy(dtype=int), compressed_df["parent_track_id"].to_numpy(dtype=int), ) + roots = np.asarray(roots, dtype=int) subforest = [] for root in roots: diff --git a/ultrack/cli/_test/test_cli.py b/ultrack/cli/_test/test_cli.py index 2402fe0..913f9f7 100644 --- a/ultrack/cli/_test/test_cli.py +++ b/ultrack/cli/_test/test_cli.py @@ -15,7 +15,7 @@ def _run_command(command_and_args: List[str]) -> None: try: main(command_and_args) except SystemExit as exit: - assert exit.code == 0 + assert exit.code == 0, f"{command_and_args} failed with exit code {exit.code}" @pytest.mark.usefixtures("zarr_dataset_paths") @@ -105,6 +105,18 @@ def test_ctc_export(self, instance_config_path: str, tmp_path: Path) -> None: ] ) + def test_trackmate_export(self, instance_config_path: str, tmp_path: Path) -> None: + _run_command( + [ + "export", + "trackmate", + "-cfg", + instance_config_path, + "-o", + str(tmp_path / "tracks.xml"), + ] + ) + def test_zarr_napari_export( self, instance_config_path: str, diff --git a/ultrack/cli/export.py b/ultrack/cli/export.py index e74135b..4e2eeb3 100644 --- a/ultrack/cli/export.py +++ b/ultrack/cli/export.py @@ -14,7 +14,7 @@ tuple_callback, ) from ultrack.config import MainConfig -from ultrack.core.export import to_ctc, to_tracks_layer, tracks_to_zarr +from ultrack.core.export import to_ctc, to_trackmate, to_tracks_layer, tracks_to_zarr from ultrack.core.export.utils import maybe_overwrite_path from ultrack.imgproc.measure import tracks_properties @@ -162,10 +162,33 @@ def zarr_napari_cli( tracks_w_measures.to_csv(tracks_path, index=False) +@click.command("trackmate") +@click.option( + "--output-path", + "-o", + required=True, + type=click.Path(path_type=Path), + show_default=True, + help="TrackMate XML output path.", +) +@config_option() +@overwrite_option() +def trackmate_cli( + config: MainConfig, + output_path: Path, + overwrite: bool, +) -> None: + """ + Exports tracking results to TrackMate XML format. + """ + to_trackmate(config, output_path, overwrite) + + @click.group("export") def export_cli() -> None: """Exports tracking and segmentation results to selected format.""" export_cli.add_command(ctc_cli) +export_cli.add_command(trackmate_cli) export_cli.add_command(zarr_napari_cli) diff --git a/ultrack/core/export/__init__.py b/ultrack/core/export/__init__.py index 03221ce..4edccca 100644 --- a/ultrack/core/export/__init__.py +++ b/ultrack/core/export/__init__.py @@ -1,3 +1,4 @@ from ultrack.core.export.ctc import to_ctc +from ultrack.core.export.trackmate import to_trackmate from ultrack.core.export.tracks_layer import to_tracks_layer from ultrack.core.export.zarr import tracks_to_zarr diff --git a/ultrack/core/export/_test/test_trackmate.py b/ultrack/core/export/_test/test_trackmate.py new file mode 100644 index 0000000..cc01a19 --- /dev/null +++ b/ultrack/core/export/_test/test_trackmate.py @@ -0,0 +1,37 @@ +from pathlib import Path + +import numpy as np +import pandas as pd +from pytrackmate import trackmate_peak_import + +from ultrack.core.database import NO_PARENT +from ultrack.core.export.trackmate import tracks_layer_to_trackmate + + +def test_trackmate_writer(tmp_path: Path) -> None: + tracks_outpath = tmp_path / "tracks.xml" + + tracks_df = pd.DataFrame( + { + "id": [1, 2, 3, 4], + "parent_id": [NO_PARENT, 1, 2, 2], + "track_id": [1, 1, 2, 3], + "t": [0, 1, 2, 2], + "z": [0, 0, 0, 0], + "y": [10, 20, 30, 10], + "x": [1, 2, 3, 1], + } + ) + + xml_str = tracks_layer_to_trackmate(tracks_df) + with open(tracks_outpath, "w") as f: + f.write(xml_str) + + trackmate_df = trackmate_peak_import(tracks_outpath) + print(trackmate_df) + + assert trackmate_df.shape[0] == tracks_df.shape[0] + + np.testing.assert_allclose( + tracks_df[["t", "z", "y", "x"]], trackmate_df[["t_stamp", "z", "y", "x"]] + ) diff --git a/ultrack/core/export/trackmate.py b/ultrack/core/export/trackmate.py new file mode 100644 index 0000000..28c32db --- /dev/null +++ b/ultrack/core/export/trackmate.py @@ -0,0 +1,226 @@ +import xml.dom.minidom as minidom +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Optional, Union + +import pandas as pd + +from ultrack.config.config import MainConfig +from ultrack.core.database import NO_PARENT +from ultrack.core.export.tracks_layer import to_tracks_layer + + +def _set_filter_elem(elem: ET.Element) -> None: + elem.set("feature", "QUALITY") + elem.set("value", "0.0") + elem.set("isabove", "true") + + +def tracks_layer_to_trackmate( + tracks_df: pd.DataFrame, +) -> str: + """ + Convert a pandas DataFrame representation of Napari track layer to TrackMate XML format. + + Parameters + ---------- + tracks_df : pd.DataFrame + A DataFrame with columns `track_id, id, parent_id, t, z, y, x`. Cells that belong to the same track + have the same `track_id`. + + Returns + ------- + str + A string representation of the XML in the TrackMate format. + """ + tracks_df["id"] = tracks_df["id"].astype(int) + tracks_df["parent_id"] = tracks_df["parent_id"].astype(int) + tracks_df["track_id"] = tracks_df["track_id"].astype(int) + + # Create XML root and child elements + root = ET.Element("TrackMate") + root.set("version", "3.7.0") # required by TrackMate, not significant + + model_elem = ET.SubElement(root, "Model") + all_tracks_elem = ET.SubElement(model_elem, "AllTracks") + filtered_tracks_elem = ET.SubElement(model_elem, "FilteredTracks") + all_spots_elem = ET.SubElement(model_elem, "AllSpots") + features_elem = ET.SubElement(model_elem, "FeatureDeclarations") + + settings_elem = ET.SubElement(root, "Settings") + + _set_filter_elem(ET.SubElement(settings_elem, "InitialSpotFilter")) + ET.SubElement(settings_elem, "SpotFilterCollection") + ET.SubElement(settings_elem, "TrackFilterCollection") + + image_elem = ET.SubElement(settings_elem, "ImageData") + image_elem.set("filename", "None") + image_elem.set("folder", "None") + image_elem.set("width", "0") + image_elem.set("height", "0") + image_elem.set("depth", "0") + image_elem.set("nslices", "1") + image_elem.set("nframes", str(tracks_df["t"].max() + 1)) + image_elem.set("pixelwidth", "1.0") + image_elem.set("pixelheight", "1.0") + image_elem.set("voxeldepth", "1.0") + image_elem.set("timeinterval", "1.0") + + has_z = "z" in tracks_df.columns + + # Create spot features + spot_features_elem = ET.SubElement(features_elem, "SpotFeatures") + spot_features = [ + ("QUALITY", "Quality", "Quality", "QUALITY", "false"), + ("POSITION_X", "X", "X", "POSITION", "false"), + ("POSITION_Y", "Y", "Y", "POSITION", "false"), + ("POSITION_Z", "Z", "Z", "POSITION", "false"), + ("POSITION_T", "T", "T", "TIME", "false"), + ("FRAME", "Frame", "Frame", "NONE", "true"), + ("RADIUS", "Radius", "R", "LENGTH", "false"), + ("VISIBILITY", "Visibility", "Visibility", "NONE", "true"), + ("MANUAL_COLOR", "Manual spot color", "Spot color", "NONE", "true"), + ("MEAN_INTENSITY", "Mean intensity", "Mean", "INTENSITY", "false"), + ("MEDIAN_INTENSITY", "Median intensity", "Median", "INTENSITY", "false"), + ("MIN_INTENSITY", "Minimal intensity", "Min", "INTENSITY", "false"), + ("MAX_INTENSITY", "Maximal intensity", "Max", "INTENSITY", "false"), + ("TOTAL_INTENSITY", "Total intensity", "Total int.", "INTENSITY", "false"), + ("STANDARD_DEVIATION", "Standard deviation", "Stdev.", "INTENSITY", "false"), + ("ESTIMATED_DIAMETER", "Estimated diameter", "Diam.", "LENGTH", "false"), + ("CONTRAST", "Contrast", "Constrast", "NONE", "false"), + ("SNR", "Signal/Noise ratio", "SNR", "NONE", "false"), + ] + for feature, name, shortname, dimension, isint in spot_features: + elem = ET.SubElement(spot_features_elem, "Feature") + elem.set("feature", feature) + elem.set("name", name) + elem.set("shortname", shortname) + elem.set("dimension", dimension) + elem.set("isint", isint) + + # Create edge features + # Create edge features + edge_features_elem = ET.SubElement(features_elem, "EdgeFeatures") + edge_features = [ + ("SPOT_SOURCE_ID", "Source spot ID", "Source ID", "NONE", "true"), + ("SPOT_TARGET_ID", "Target spot ID", "Target ID", "NONE", "true"), + # ... add other edge features if needed + ] + for feature, name, shortname, dimension, isint in edge_features: + elem = ET.SubElement(edge_features_elem, "Feature") + elem.set("feature", feature) + elem.set("name", name) + elem.set("shortname", shortname) + elem.set("dimension", dimension) + elem.set("isint", isint) + + track_features_elem = ET.SubElement(features_elem, "TrackFeatures") + track_features = [ + ("NUMBER_SPOTS", "Number of spots in track", "N spots", "NONE", "true"), + ("NUMBER_GAPS", "Number of gaps", "Gaps", "NONE", "true"), + ("LONGEST_GAP", "Longest gap", "Longest gap", "NONE", "true"), + ("NUMBER_SPLITS", "Number of split events", "Splits", "NONE", "true"), + ("NUMBER_MERGES", "Number of merge events", "Merges", "NONE", "true"), + ("NUMBER_COMPLEX", "Complex points", "Complex", "NONE", "true"), + ("TRACK_DURATION", "Duration of track", "Duration", "TIME", "false"), + ("TRACK_START", "Track start", "T start", "TIME", "false"), + ("TRACK_STOP", "Track stop", "T stop", "TIME", "false"), + ("TRACK_DISPLACEMENT", "Track displacement", "Displacement", "LENGTH", "false"), + ("TRACK_INDEX", "Track index", "Index", "NONE", "true"), + ("TRACK_ID", "Track ID", "ID", "NONE", "true"), + ] + + for feature, name, shortname, dimension, isint in track_features: + elem = ET.SubElement(track_features_elem, "Feature") + elem.set("feature", feature) + elem.set("name", name) + elem.set("shortname", shortname) + elem.set("dimension", dimension) + elem.set("isint", isint) + + # Create spots + for frame, group in tracks_df.groupby("t"): + frame_elem = ET.SubElement(all_spots_elem, "SpotsInFrame") + frame_elem.set("frame", str(frame)) + for spot_id, entry in group.iterrows(): + spot_elem = ET.SubElement(frame_elem, "Spot") + spot_elem.set("ID", str(spot_id)) + spot_elem.set("QUALITY", "1.0") + spot_elem.set("VISIBILITY", "1") + spot_elem.set("NAME", str(spot_id)) + spot_elem.set("FRAME", str(int(entry["t"]))) + spot_elem.set("RADIUS", "5.0") + spot_elem.set("POSITION_X", str(entry["x"])) + spot_elem.set("POSITION_Y", str(entry["y"])) + if has_z: + spot_elem.set("POSITION_Z", str(entry["z"])) + else: + spot_elem.set("POSITION_Z", "0.0") + + # Create tracks using lineage + for track_id, group in tracks_df.groupby("track_id"): + track_elem = ET.SubElement(all_tracks_elem, "Track") + track_elem.set("TRACK_ID", str(track_id)) + track_elem.set("NUMBER_SPOTS", str(len(group))) + track_elem.set("NUMBER_GAPS", "0") + track_elem.set("TRACK_START", str(group["t"].min())) + track_elem.set("TRACK_STOP", str(group["t"].max())) + track_elem.set("name", f"Track_{track_id}") + + ET.SubElement(filtered_tracks_elem, "TrackID").set("TRACK_ID", str(track_id)) + + for spot_id, entry in group.iterrows(): + parent_id = int(entry["parent_id"]) + if parent_id == NO_PARENT: + continue + edge_elem = ET.SubElement(track_elem, "Edge") + edge_elem.set("SPOT_SOURCE_ID", str(parent_id)) + edge_elem.set("SPOT_TARGET_ID", str(spot_id)) + edge_elem.set("EDGE_TIME", str(entry["t"] - 0.5)) + + # Convert to XML string + xml_str = ET.tostring(root, encoding="unicode") + xml_str = minidom.parseString(xml_str).toprettyxml() + + return xml_str + + +def to_trackmate( + config: MainConfig, + output_path: Optional[Union[Path, str]] = None, + overwrite: bool = False, +) -> str: + """ + Exports tracking results to TrackMate XML format. + + Parameters + ---------- + config : MainConfig + ULTrack configuration parameters. + output_path : Optional[Path], optional + Output file path, by default None + overwrite : bool, optional + Whether to overwrite the output file if it already exists, by default False + + Returns + ------- + str + A string representation of the XML in the TrackMate format. + """ + tracks_df, _ = to_tracks_layer(config) + xml_str = tracks_layer_to_trackmate(tracks_df) + + # Save to file if output_path is provided + if output_path is not None: + if isinstance(output_path, str): + output_path = Path(output_path) + + if output_path.exists() and not overwrite: + raise FileExistsError( + f"File {output_path} already exists. Set overwrite=True to overwrite." + ) + + with output_path.open("w") as f: + f.write(xml_str) + + return xml_str