jni · jni · Nov 20, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/benchmarks/bench_skan.py b/benchmarks/bench_skan.py
@@ -42,7 +42,7 @@ def bench_suite():
         skel_obj = csr.Skeleton(skeleton)
     times['skeleton object again'] = t_skeleton2[0]
     with timer() as t_summary:
-        summary = csr.summarize(skel_obj)
+        summary = csr.summarize(skel_obj, separator='_')
     times['compute per-skeleton statistics'] = t_summary[0]
     return times
 

diff --git a/doc/examples/visualizing_3d_skeletons.md b/doc/examples/visualizing_3d_skeletons.md
@@ -49,17 +49,17 @@ all_paths = [
 ```
 
 ```{code-cell} ipython3
-paths_table = skan.summarize(skeleton)
+paths_table = skan.summarize(skeleton, separator='_')
 ```
 
 ```{code-cell} ipython3
-paths_table['path-id'] = np.arange(skeleton.n_paths)
+paths_table['path_id'] = np.arange(skeleton.n_paths)
 ```
 
 First, we color by random path ID, showing each path in a distinct color using the matplotlib "tab10" qualitative palette. (Coloring by path ID directly results in "bands" of nearby paths receiving the same color.)
 
 ```{code-cell} ipython3
-paths_table['random-path-id'] = np.random.default_rng().permutation(skeleton.n_paths)
+paths_table['random_path_id'] = np.random.default_rng().permutation(skeleton.n_paths)
 ```
 
 ```{code-cell} ipython3
@@ -70,7 +70,7 @@ skeleton_layer = viewer.add_shapes(
         shape_type='path',
         properties=paths_table,
         edge_width=0.5,
-        edge_color='random-path-id',
+        edge_color='random_path_id',
         edge_colormap='tab10',
 )
 ```
@@ -85,9 +85,9 @@ napari.utils.nbscreenshot(viewer)
 We can also demonstrate that most of these branches are in one skeleton, with a few stragglers around the edges, by coloring by skeleton ID:
 
 ```{code-cell} ipython3
-skeleton_layer.edge_color = 'skeleton-id'
+skeleton_layer.edge_color = 'skeleton_id'
 # for now, we need to set the face color as well
-skeleton_layer.face_color = 'skeleton-id'
+skeleton_layer.face_color = 'skeleton_id'
 ```
 
 ```{code-cell} ipython3
@@ -99,10 +99,10 @@ napari.utils.nbscreenshot(viewer)
 Finally, we can color the paths by a numerical property, such as their length.
 
 ```{code-cell} ipython3
-skeleton_layer.edge_color = 'branch-distance'
+skeleton_layer.edge_color = 'branch_distance'
 skeleton_layer.edge_colormap = 'viridis'
 # for now, we need to set the face color as well
-skeleton_layer.face_color = 'branch-distance'
+skeleton_layer.face_color = 'branch_distance'
 skeleton_layer.face_colormap = 'viridis'
 ```
 

diff --git a/doc/getting_started/getting_started.md b/doc/getting_started/getting_started.md
@@ -136,7 +136,7 @@ Let's go back to the red blood cell image to illustrate this graph.
 
 ```{code-cell} ipython3
 from skan import Skeleton, summarize
-branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm))
+branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm), separator='_')
 branch_data.head()
 ```
 
@@ -156,7 +156,7 @@ Next come the coordinates in natural space, the Euclidean distance between the p
 This data table follows the "tidy data" paradigm, with one row per branch, which allows fast  exploration of branch statistics. Here, for example, we plot the distribution of branch lengths according to branch type:
 
 ```{code-cell} ipython3
-branch_data.hist(column='branch-distance', by='branch-type', bins=100);
+branch_data.hist(column='branch_distance', by='branch_type', bins=100);
 ```
 
 We can see that junction-to-junction branches tend to be longer than junction-to-endpoint and junction isolated branches, and that there are no cycles in our dataset.
@@ -165,7 +165,7 @@ We can also represent this visually with the `overlay_euclidean_skeleton`, which
 
 ```{code-cell} ipython3
 draw.overlay_euclidean_skeleton_2d(image0, branch_data,
-                                   skeleton_color_source='branch-type');
+                                   skeleton_color_source='branch_type');
 ```
 
 ## 2. Comparing different skeletons
@@ -194,7 +194,7 @@ def skeletonize(images, spacings_nm):
 
 
 skeletons = skeletonize(images, spacings_nm)
-tables = [summarize(Skeleton(skeleton, spacing=spacing))
+tables = [summarize(Skeleton(skeleton, spacing=spacing), separator='_')
           for skeleton, spacing in zip(skeletons, spacings_nm)]
 
 for filename, dataframe in zip(files, tables):
@@ -210,8 +210,8 @@ Now, however, we have a tidy data table with information about the sample origin
 ```{code-cell} ipython3
 import seaborn as sns
 
-j2j = (table[table['branch-type'] == 2].
-       rename(columns={'branch-distance':
+j2j = (table[table['branch_type'] == 2].
+       rename(columns={'branch_distance':
                        'branch distance (nm)'}))
 per_image = j2j.groupby('filename').median()
 per_image['infected'] = ['infected' if 'inf' in fn else 'normal'

diff --git a/src/skan/csr.py b/src/skan/csr.py
@@ -705,7 +705,8 @@ def summarize(
         skel: Skeleton,
         *,
         value_is_height: bool = False,
-        find_main_branch: bool = False
+        find_main_branch: bool = False,
+        separator: str | None = None,
         ) -> pd.DataFrame:
     """Compute statistics for every skeleton and branch in ``skel``.
 
@@ -722,13 +723,27 @@ def summarize(
         longest shortest path within a skeleton. This step is very expensive
         as it involves computing the shortest paths between all pairs of branch
         endpoints, so it is off by default.
+    separator : str, optional
+        Some column names are composite, e.g. ``'coord_src_0'``. The separator
+        argument allows users to configure which character is used to separate
+        the components. The default up to version 0.12 is '-', but will change
+        to '_' in version 0.13.
 
     Returns
     -------
     summary : pandas.DataFrame
         A summary of the branches including branch length, mean branch value,
         branch euclidean distance, etc.
     """
+    if separator is None:
+        warnings.warn(
+                "separator in column name will change to _ in version 0.13; "
+                "to silence this warning, use `separator='-'` to maintain "
+                "current behavior and use `separator='_'` to switch to the "
+                "new default behavior.",
+                FutureWarning,
+                )
+        separator = '-'
     summary = {}
     ndim = skel.coordinates.shape[1]
     _, skeleton_ids = csgraph.connected_components(skel.graph, directed=False)
@@ -780,6 +795,7 @@ def summarize(
     if find_main_branch:
         # define main branch as longest shortest path within a single skeleton
         df['main'] = find_main_branches(df)
+    df.rename(columns=lambda s: s.replace('_', separator), inplace=True)
     return df
 
 
@@ -1051,7 +1067,7 @@ def _simplify_graph(skel):
         # don't reduce
         return skel.graph, np.arange(skel.graph.shape[0])
 
-    summary = summarize(skel)
+    summary = summarize(skel, separator='_')
     src = np.asarray(summary['node_id_src'])
     dst = np.asarray(summary['node_id_dst'])
     distance = np.asarray(summary['branch_distance'])

diff --git a/src/skan/napari_skan.py b/src/skan/napari_skan.py
@@ -39,7 +39,7 @@ def labels_to_skeleton_shapes(
     all_paths = [skeleton.path_coordinates(i) for i in range(skeleton.n_paths)]
 
     # option to have main_path = True (or something) changing header
-    paths_table = summarize(skeleton)
+    paths_table = summarize(skeleton, separator='_')
     layer_kwargs = {
             'shape_type': 'path',
             'edge_colormap': 'tab10',

diff --git a/src/skan/pipe.py b/src/skan/pipe.py
@@ -65,7 +65,9 @@ def process_single_image(
             )
     quality = shape_index(image, sigma=pixel_smoothing_radius, mode='reflect')
     skeleton = morphology.skeletonize(thresholded) * quality
-    framedata = csr.summarize(csr.Skeleton(skeleton, spacing=scale))
+    framedata = csr.summarize(
+            csr.Skeleton(skeleton, spacing=scale), separator='_'
+            )
     framedata['squiggle'] = np.log2(
             framedata['branch_distance'] / framedata['euclidean_distance']
             )

diff --git a/src/skan/test/test_csr.py b/src/skan/test/test_csr.py
@@ -18,7 +18,9 @@ def _old_branch_statistics(
     skel = csr.Skeleton(
             skeleton_image, spacing=spacing, value_is_height=value_is_height
             )
-    summary = csr.summarize(skel, value_is_height=value_is_height)
+    summary = csr.summarize(
+            skel, value_is_height=value_is_height, separator='_'
+            )
     columns = ['node_id_src', 'node_id_dst', 'branch_distance', 'branch_type']
     return summary[columns].to_numpy()
 
@@ -54,15 +56,15 @@ def test_skeleton1_stats():
 
 
 def test_2skeletons():
-    df = csr.summarize(csr.Skeleton(skeleton2))
+    df = csr.summarize(csr.Skeleton(skeleton2), separator='_')
     assert_almost_equal(np.unique(df['euclidean_distance']), np.sqrt([5, 10]))
     assert_equal(np.unique(df['skeleton_id']), [0, 1])
     assert_equal(np.bincount(df['branch_type']), [0, 4, 4])
 
 
 def test_summarize_spacing():
-    df = csr.summarize(csr.Skeleton(skeleton2))
-    df2 = csr.summarize(csr.Skeleton(skeleton2, spacing=2))
+    df = csr.summarize(csr.Skeleton(skeleton2), separator='_')
+    df2 = csr.summarize(csr.Skeleton(skeleton2, spacing=2), separator='_')
     assert_equal(np.array(df['node_id_src']), np.array(df2['node_id_src']))
     assert_almost_equal(
             np.array(df2['euclidean_distance']),
@@ -107,6 +109,7 @@ def test_topograph_summary():
     stats = csr.summarize(
             csr.Skeleton(topograph1d, spacing=2.5, value_is_height=True),
             value_is_height=True,
+            separator='_',
             )
     assert stats.loc[0, 'euclidean_distance'] == 5.0
     columns = ['coord_src_0', 'coord_src_1', 'coord_dst_0', 'coord_dst_1']
@@ -121,8 +124,8 @@ def test_junction_multiplicity():
 
 
 def test_multiplicity_stats():
-    stats1 = csr.summarize(csr.Skeleton(skeleton0))
-    stats2 = csr.summarize(csr.Skeleton(skeleton0, spacing=2))
+    stats1 = csr.summarize(csr.Skeleton(skeleton0), separator='_')
+    stats2 = csr.summarize(csr.Skeleton(skeleton0, spacing=2), separator='_')
     assert_almost_equal(
             2 * stats1['branch_distance'].values,
             stats2['branch_distance'].values
@@ -136,12 +139,12 @@ def test_multiplicity_stats():
 def test_pixel_values():
     image = np.random.random((45,))
     expected = np.mean(image)
-    stats = csr.summarize(csr.Skeleton(image))
+    stats = csr.summarize(csr.Skeleton(image), separator='_')
     assert_almost_equal(stats.loc[0, 'mean_pixel_value'], expected)
 
 
 def test_tip_junction_edges():
-    stats1 = csr.summarize(csr.Skeleton(skeleton4))
+    stats1 = csr.summarize(csr.Skeleton(skeleton4), separator='_')
     assert stats1.shape[0] == 3  # ensure all three branches are counted
 
 
@@ -208,7 +211,7 @@ def test_prune_paths(
         ) -> None:
     """Test pruning of paths."""
     s = csr.Skeleton(skeleton, keep_images=True)
-    summary = summarize(s)
+    summary = summarize(s, separator='_')
     indices_to_remove = summary.loc[summary['branch_type'] == prune_branch
                                     ].index
     pruned = s.prune_paths(indices_to_remove)
@@ -219,7 +222,7 @@ def test_prune_paths_exception_single_point() -> None:
     """Test exceptions raised when pruning leaves a single point and Skeleton object
     can not be created and returned."""
     s = csr.Skeleton(skeleton0)
-    summary = summarize(s)
+    summary = summarize(s, separator='_')
     indices_to_remove = summary.loc[summary['branch_type'] == 1].index
     with pytest.raises(ValueError):
         s.prune_paths(indices_to_remove)
@@ -229,7 +232,7 @@ def test_prune_paths_exception_invalid_path_index() -> None:
     """Test exceptions raised when trying to prune paths that do not exist in the summary. This can arise if skeletons
     are not updated correctly during iterative pruning."""
     s = csr.Skeleton(skeleton0)
-    summary = summarize(s)
+    summary = summarize(s, separator='_')
     indices_to_remove = [6]
     with pytest.raises(ValueError):
         s.prune_paths(indices_to_remove)
@@ -314,6 +317,12 @@ def test_skeleton_path_image_no_keep_image():
 
 
 def test_skeletonlabel():
-    stats = csr.summarize(csr.Skeleton(skeletonlabel))
+    stats = csr.summarize(csr.Skeleton(skeletonlabel), separator='_')
     assert stats['mean_pixel_value'].max() == skeletonlabel.max()
     assert stats['mean_pixel_value'].max() > 1
+
+
+def test_default_summarize_separator():
+    with pytest.warns(FutureWarning, match='separator in column name'):
+        stats = csr.summarize(csr.Skeleton(skeletonlabel))
+    assert 'skeleton-id' in stats
diff --git a/src/skan/test/test_draw.py b/src/skan/test/test_draw.py
@@ -35,7 +35,7 @@ def test_skeleton(test_thresholded):
 
 @pytest.fixture
 def test_stats(test_skeleton):
-    stats = csr.summarize(csr.Skeleton(test_skeleton))
+    stats = csr.summarize(csr.Skeleton(test_skeleton), separator='_')
     return stats
 
 

diff --git a/src/skan/test/test_skeleton_class.py b/src/skan/test/test_skeleton_class.py
@@ -101,7 +101,7 @@ def test_skeleton_summarize():
     image = np.zeros(skeleton2.shape, dtype=float)
     image[skeleton2] = 1 + np.random.random(np.sum(skeleton2))
     skeleton = Skeleton(image)
-    summary = summarize(skeleton)
+    summary = summarize(skeleton, separator='_')
     assert set(summary['skeleton_id']) == {0, 1}
     assert (
             np.all(summary['mean_pixel_value'] < 2)

diff --git a/src/skan/test/test_summary_utils.py b/src/skan/test/test_summary_utils.py
@@ -6,7 +6,7 @@
 
 def test_find_main():
     skeleton = Skeleton(skeleton1)
-    summary_df = summarize(skeleton, find_main_branch=True)
+    summary_df = summarize(skeleton, find_main_branch=True, separator='_')
 
     non_main_edge_start = [2, 1]
     non_main_edge_finish = [3, 3]