Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert '-' to '_' in summary dataframe #215

Merged
merged 20 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/bench_skan.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def bench_suite():
skel_obj = csr.Skeleton(skeleton)
times['skeleton object again'] = t_skeleton2[0]
with timer() as t_summary:
summary = csr.summarize(skel_obj)
summary = csr.summarize(skel_obj, separator='_')
times['compute per-skeleton statistics'] = t_summary[0]
return times

Expand Down
16 changes: 8 additions & 8 deletions doc/examples/visualizing_3d_skeletons.md
ns-rse marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,17 @@ all_paths = [
```

```{code-cell} ipython3
paths_table = skan.summarize(skeleton)
paths_table = skan.summarize(skeleton, separator='_')
```

```{code-cell} ipython3
paths_table['path-id'] = np.arange(skeleton.n_paths)
paths_table['path_id'] = np.arange(skeleton.n_paths)
```

First, we color by random path ID, showing each path in a distinct color using the matplotlib "tab10" qualitative palette. (Coloring by path ID directly results in "bands" of nearby paths receiving the same color.)

```{code-cell} ipython3
paths_table['random-path-id'] = np.random.default_rng().permutation(skeleton.n_paths)
paths_table['random_path_id'] = np.random.default_rng().permutation(skeleton.n_paths)
```

```{code-cell} ipython3
Expand All @@ -70,7 +70,7 @@ skeleton_layer = viewer.add_shapes(
shape_type='path',
properties=paths_table,
edge_width=0.5,
edge_color='random-path-id',
edge_color='random_path_id',
edge_colormap='tab10',
)
```
Expand All @@ -85,9 +85,9 @@ napari.utils.nbscreenshot(viewer)
We can also demonstrate that most of these branches are in one skeleton, with a few stragglers around the edges, by coloring by skeleton ID:

```{code-cell} ipython3
skeleton_layer.edge_color = 'skeleton-id'
skeleton_layer.edge_color = 'skeleton_id'
# for now, we need to set the face color as well
skeleton_layer.face_color = 'skeleton-id'
skeleton_layer.face_color = 'skeleton_id'
```

```{code-cell} ipython3
Expand All @@ -99,10 +99,10 @@ napari.utils.nbscreenshot(viewer)
Finally, we can color the paths by a numerical property, such as their length.

```{code-cell} ipython3
skeleton_layer.edge_color = 'branch-distance'
skeleton_layer.edge_color = 'branch_distance'
skeleton_layer.edge_colormap = 'viridis'
# for now, we need to set the face color as well
skeleton_layer.face_color = 'branch-distance'
skeleton_layer.face_color = 'branch_distance'
skeleton_layer.face_colormap = 'viridis'
```

Expand Down
12 changes: 6 additions & 6 deletions doc/getting_started/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Let's go back to the red blood cell image to illustrate this graph.

```{code-cell} ipython3
from skan import Skeleton, summarize
branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm))
branch_data = summarize(Skeleton(skeleton0, spacing=spacing_nm), separator='_')
branch_data.head()
```

Expand All @@ -156,7 +156,7 @@ Next come the coordinates in natural space, the Euclidean distance between the p
This data table follows the "tidy data" paradigm, with one row per branch, which allows fast exploration of branch statistics. Here, for example, we plot the distribution of branch lengths according to branch type:

```{code-cell} ipython3
branch_data.hist(column='branch-distance', by='branch-type', bins=100);
branch_data.hist(column='branch_distance', by='branch_type', bins=100);
```

We can see that junction-to-junction branches tend to be longer than junction-to-endpoint and junction isolated branches, and that there are no cycles in our dataset.
Expand All @@ -165,7 +165,7 @@ We can also represent this visually with the `overlay_euclidean_skeleton`, which

```{code-cell} ipython3
draw.overlay_euclidean_skeleton_2d(image0, branch_data,
skeleton_color_source='branch-type');
skeleton_color_source='branch_type');
```

## 2. Comparing different skeletons
Expand Down Expand Up @@ -194,7 +194,7 @@ def skeletonize(images, spacings_nm):


skeletons = skeletonize(images, spacings_nm)
tables = [summarize(Skeleton(skeleton, spacing=spacing))
tables = [summarize(Skeleton(skeleton, spacing=spacing), separator='_')
for skeleton, spacing in zip(skeletons, spacings_nm)]

for filename, dataframe in zip(files, tables):
Expand All @@ -210,8 +210,8 @@ Now, however, we have a tidy data table with information about the sample origin
```{code-cell} ipython3
import seaborn as sns

j2j = (table[table['branch-type'] == 2].
rename(columns={'branch-distance':
j2j = (table[table['branch_type'] == 2].
rename(columns={'branch_distance':
'branch distance (nm)'}))
per_image = j2j.groupby('filename').median()
per_image['infected'] = ['infected' if 'inf' in fn else 'normal'
Expand Down
20 changes: 18 additions & 2 deletions src/skan/csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,8 @@ def summarize(
skel: Skeleton,
*,
value_is_height: bool = False,
find_main_branch: bool = False
find_main_branch: bool = False,
separator: str | None = None,
) -> pd.DataFrame:
"""Compute statistics for every skeleton and branch in ``skel``.

Expand All @@ -722,13 +723,27 @@ def summarize(
longest shortest path within a skeleton. This step is very expensive
as it involves computing the shortest paths between all pairs of branch
endpoints, so it is off by default.
separator : str, optional
Some column names are composite, e.g. ``'coord_src_0'``. The separator
argument allows users to configure which character is used to separate
the components. The default up to version 0.12 is '-', but will change
to '_' in version 0.13.

Returns
-------
summary : pandas.DataFrame
A summary of the branches including branch length, mean branch value,
branch euclidean distance, etc.
"""
if separator is None:
ns-rse marked this conversation as resolved.
Show resolved Hide resolved
warnings.warn(
"separator in column name will change to _ in version 0.13; "
"to silence this warning, use `separator='-'` to maintain "
"current behavior and use `separator='_'` to switch to the "
"new default behavior.",
FutureWarning,
)
separator = '-'
summary = {}
ndim = skel.coordinates.shape[1]
_, skeleton_ids = csgraph.connected_components(skel.graph, directed=False)
Expand Down Expand Up @@ -780,6 +795,7 @@ def summarize(
if find_main_branch:
# define main branch as longest shortest path within a single skeleton
df['main'] = find_main_branches(df)
df.rename(columns=lambda s: s.replace('_', separator), inplace=True)
return df


Expand Down Expand Up @@ -1051,7 +1067,7 @@ def _simplify_graph(skel):
# don't reduce
return skel.graph, np.arange(skel.graph.shape[0])

summary = summarize(skel)
summary = summarize(skel, separator='_')
src = np.asarray(summary['node_id_src'])
dst = np.asarray(summary['node_id_dst'])
distance = np.asarray(summary['branch_distance'])
Expand Down
2 changes: 1 addition & 1 deletion src/skan/napari_skan.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def labels_to_skeleton_shapes(
all_paths = [skeleton.path_coordinates(i) for i in range(skeleton.n_paths)]

# option to have main_path = True (or something) changing header
paths_table = summarize(skeleton)
paths_table = summarize(skeleton, separator='_')
layer_kwargs = {
'shape_type': 'path',
'edge_colormap': 'tab10',
Expand Down
4 changes: 3 additions & 1 deletion src/skan/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def process_single_image(
)
quality = shape_index(image, sigma=pixel_smoothing_radius, mode='reflect')
skeleton = morphology.skeletonize(thresholded) * quality
framedata = csr.summarize(csr.Skeleton(skeleton, spacing=scale))
framedata = csr.summarize(
csr.Skeleton(skeleton, spacing=scale), separator='_'
)
framedata['squiggle'] = np.log2(
framedata['branch_distance'] / framedata['euclidean_distance']
)
Expand Down
33 changes: 21 additions & 12 deletions src/skan/test/test_csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ def _old_branch_statistics(
skel = csr.Skeleton(
skeleton_image, spacing=spacing, value_is_height=value_is_height
)
summary = csr.summarize(skel, value_is_height=value_is_height)
summary = csr.summarize(
skel, value_is_height=value_is_height, separator='_'
)
columns = ['node_id_src', 'node_id_dst', 'branch_distance', 'branch_type']
return summary[columns].to_numpy()

Expand Down Expand Up @@ -54,15 +56,15 @@ def test_skeleton1_stats():


def test_2skeletons():
df = csr.summarize(csr.Skeleton(skeleton2))
df = csr.summarize(csr.Skeleton(skeleton2), separator='_')
assert_almost_equal(np.unique(df['euclidean_distance']), np.sqrt([5, 10]))
assert_equal(np.unique(df['skeleton_id']), [0, 1])
assert_equal(np.bincount(df['branch_type']), [0, 4, 4])


def test_summarize_spacing():
df = csr.summarize(csr.Skeleton(skeleton2))
df2 = csr.summarize(csr.Skeleton(skeleton2, spacing=2))
df = csr.summarize(csr.Skeleton(skeleton2), separator='_')
df2 = csr.summarize(csr.Skeleton(skeleton2, spacing=2), separator='_')
assert_equal(np.array(df['node_id_src']), np.array(df2['node_id_src']))
assert_almost_equal(
np.array(df2['euclidean_distance']),
Expand Down Expand Up @@ -107,6 +109,7 @@ def test_topograph_summary():
stats = csr.summarize(
csr.Skeleton(topograph1d, spacing=2.5, value_is_height=True),
value_is_height=True,
separator='_',
)
assert stats.loc[0, 'euclidean_distance'] == 5.0
columns = ['coord_src_0', 'coord_src_1', 'coord_dst_0', 'coord_dst_1']
Expand All @@ -121,8 +124,8 @@ def test_junction_multiplicity():


def test_multiplicity_stats():
stats1 = csr.summarize(csr.Skeleton(skeleton0))
stats2 = csr.summarize(csr.Skeleton(skeleton0, spacing=2))
stats1 = csr.summarize(csr.Skeleton(skeleton0), separator='_')
stats2 = csr.summarize(csr.Skeleton(skeleton0, spacing=2), separator='_')
assert_almost_equal(
2 * stats1['branch_distance'].values,
stats2['branch_distance'].values
Expand All @@ -136,12 +139,12 @@ def test_multiplicity_stats():
def test_pixel_values():
image = np.random.random((45,))
expected = np.mean(image)
stats = csr.summarize(csr.Skeleton(image))
stats = csr.summarize(csr.Skeleton(image), separator='_')
assert_almost_equal(stats.loc[0, 'mean_pixel_value'], expected)


def test_tip_junction_edges():
stats1 = csr.summarize(csr.Skeleton(skeleton4))
stats1 = csr.summarize(csr.Skeleton(skeleton4), separator='_')
assert stats1.shape[0] == 3 # ensure all three branches are counted


Expand Down Expand Up @@ -208,7 +211,7 @@ def test_prune_paths(
) -> None:
"""Test pruning of paths."""
s = csr.Skeleton(skeleton, keep_images=True)
summary = summarize(s)
summary = summarize(s, separator='_')
indices_to_remove = summary.loc[summary['branch_type'] == prune_branch
].index
pruned = s.prune_paths(indices_to_remove)
Expand All @@ -219,7 +222,7 @@ def test_prune_paths_exception_single_point() -> None:
"""Test exceptions raised when pruning leaves a single point and Skeleton object
can not be created and returned."""
s = csr.Skeleton(skeleton0)
summary = summarize(s)
summary = summarize(s, separator='_')
indices_to_remove = summary.loc[summary['branch_type'] == 1].index
with pytest.raises(ValueError):
s.prune_paths(indices_to_remove)
Expand All @@ -229,7 +232,7 @@ def test_prune_paths_exception_invalid_path_index() -> None:
"""Test exceptions raised when trying to prune paths that do not exist in the summary. This can arise if skeletons
are not updated correctly during iterative pruning."""
s = csr.Skeleton(skeleton0)
summary = summarize(s)
summary = summarize(s, separator='_')
indices_to_remove = [6]
with pytest.raises(ValueError):
s.prune_paths(indices_to_remove)
Expand Down Expand Up @@ -314,6 +317,12 @@ def test_skeleton_path_image_no_keep_image():


def test_skeletonlabel():
stats = csr.summarize(csr.Skeleton(skeletonlabel))
stats = csr.summarize(csr.Skeleton(skeletonlabel), separator='_')
assert stats['mean_pixel_value'].max() == skeletonlabel.max()
assert stats['mean_pixel_value'].max() > 1


def test_default_summarize_separator():
ns-rse marked this conversation as resolved.
Show resolved Hide resolved
with pytest.warns(FutureWarning, match='separator in column name'):
stats = csr.summarize(csr.Skeleton(skeletonlabel))
assert 'skeleton-id' in stats
2 changes: 1 addition & 1 deletion src/skan/test/test_draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_skeleton(test_thresholded):

@pytest.fixture
def test_stats(test_skeleton):
stats = csr.summarize(csr.Skeleton(test_skeleton))
stats = csr.summarize(csr.Skeleton(test_skeleton), separator='_')
return stats


Expand Down
2 changes: 1 addition & 1 deletion src/skan/test/test_skeleton_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_skeleton_summarize():
image = np.zeros(skeleton2.shape, dtype=float)
image[skeleton2] = 1 + np.random.random(np.sum(skeleton2))
skeleton = Skeleton(image)
summary = summarize(skeleton)
summary = summarize(skeleton, separator='_')
assert set(summary['skeleton_id']) == {0, 1}
assert (
np.all(summary['mean_pixel_value'] < 2)
Expand Down
2 changes: 1 addition & 1 deletion src/skan/test/test_summary_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def test_find_main():
skeleton = Skeleton(skeleton1)
summary_df = summarize(skeleton, find_main_branch=True)
summary_df = summarize(skeleton, find_main_branch=True, separator='_')

non_main_edge_start = [2, 1]
non_main_edge_finish = [3, 3]
Expand Down
Loading