From aa17db025d60448c8a4548283f6879fb34c0faa8 Mon Sep 17 00:00:00 2001 From: Talmo Pereira Date: Mon, 21 Dec 2020 19:28:06 -0500 Subject: [PATCH 01/19] Add breaking test. --- tests/nn/data/test_providers.py | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 114259b77..d0beadba3 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -134,3 +134,46 @@ def test_video_reader_hdf5(): assert example["raw_image_size"].dtype == tf.int32 np.testing.assert_array_equal(example["raw_image_size"], (512, 512, 1)) + + +def test_labels_reader_multi_size(): + # Create some fake data using two different size videos. + skeleton = sleap.Skeleton.from_names_and_edge_inds(["A"]) + labels = sleap.Labels( + [ + sleap.LabeledFrame( + frame_idx=0, + video=sleap.Video.from_filename( + TEST_SMALL_ROBOT_MP4_FILE, grayscale=True + ), + instances=[ + sleap.Instance.from_pointsarray( + np.array([[128, 128]]), skeleton=skeleton + ) + ], + ), + sleap.LabeledFrame( + frame_idx=0, + video=sleap.Video.from_filename( + TEST_H5_FILE, dataset="/box", input_format="channels_first" + ), + instances=[ + sleap.Instance.from_pointsarray( + np.array([[128, 128]]), skeleton=skeleton + ) + ], + ), + ] + ) + + # Create a loader for those labels. + labels_reader = providers.LabelsReader(labels) + ds = labels_reader.make_dataset() + ds_iter = iter(ds) + + # Check shapes of individual samples. + example = next(ds_iter) + assert example["image"].shape == (320, 560, 1) + + example = next(ds_iter) + assert example["image"].shape == (512, 512, 1) From 0a2f43d7f3dbbcc9ffe62477d97b0fa16a74d9cb Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Thu, 28 Jan 2021 14:01:00 -0800 Subject: [PATCH 02/19] wip --- sleap/nn/data/providers.py | 1 - sleap/nn/data/resizing.py | 88 +++++++++++++++++++++++++++++++++ tests/nn/data/test_providers.py | 4 ++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index 8998ee7ee..bb0df192d 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -167,7 +167,6 @@ def fetch_lf(ind): [ind], [image_dtype, tf.int32, tf.float32, tf.int32, tf.int64, tf.int32], ) - image = tf.ensure_shape(image, test_image.shape) instances = tf.ensure_shape(instances, tf.TensorShape([None, None, 2])) skeleton_inds = tf.ensure_shape(skeleton_inds, tf.TensorShape([None])) diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index 95ff5cfc4..ce25cb8a8 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -251,6 +251,94 @@ def resize(example): return ds_output +@attr.s(auto_attribs=True) +class SizeEqualizer: + """Data transformer that ensures output images have uniform shape by resizing/padding smaller images. + + Attributes: + image_key: String name of the key containing the images to resize. + scale_key: String name of the key containing the scale of the images. + points_key: String name of the key containing points to adjust for the resizing + operation. + keep_full_image: If True, keeps the (original size) full image in the examples. + This is useful for multi-scale inference. + full_image_key: String name of the key containing the full images. + """ + + image_key: Text = "image" + scale_key: Text = "scale" + points_key: Optional[Text] = "instances" + keep_full_image: bool = False + full_image_key: Text = "full_image" + + @property + def input_keys(self) -> List[Text]: + """Return the keys that incoming elements are expected to have.""" + input_keys = [self.image_key, self.scale_key] + if self.points_key is not None: + input_keys.append(self.points_key) + return input_keys + + @property + def output_keys(self) -> List[Text]: + """Return the keys that outgoing elements will have.""" + output_keys = self.input_keys + if self.keep_full_image: + output_keys.append(self.full_image_key) + return output_keys + + def transform_dataset(self, ds_input: tf.data.Dataset) -> tf.data.Dataset: + """Transform a dataset with potentially different size images into one with equal sized images. + + Args: + ds_input: A dataset with the image specified in the `image_key` attribute, + points specified in the `points_key` attribute, and the "scale" key for + tracking scaling transformations. + + Returns: + A `tf.data.Dataset` with elements containing the same images and points of equal size. + + If the `keep_full_image` attribute is True, a key specified by + `full_image_key` will be added with the to the example containing the image + before any processing. + """ + + + # determine max height and width + shapes = [tf.shape(e[self.image_key]) for e in ds_input] + max_height = max([s[-3] for s in shapes]) + max_width = max([s[-2] for s in shapes]) + + # mapping function: match to max height width by resizing and padding bottom/right accordingly + def match_to_max_height_and_width(example): + if self.keep_full_image: + example[self.full_image_key] = example[self.image_key] + + current_shape = tf.shape(example[self.image_key]) + if current_shape[-3] < max_height or current_shape[-2] < max_width: + # match size + example[self.image_key] = tf.image.resize_with_pad( + example[self.image_key], + target_height=max_height, + target_width=max_width, + method=ResizeMethod.BILINEAR, + antialias=False + ) + + # ??? + if self.points_key: + example[self.points_key] = example[self.points_key] * self.scale + example[self.scale_key] = example[self.scale_key] * self.scale + + return example + + + ds_output = ds_input.map( + match_to_max_height_and_width, num_parallel_calls=tf.data.experimental.AUTOTUNE + ) + return ds_output + + @attr.s(auto_attribs=True) class PointsRescaler: """Transformer to apply or invert scaling operations on points.""" diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index d0beadba3..0ed7d7af3 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -177,3 +177,7 @@ def test_labels_reader_multi_size(): example = next(ds_iter) assert example["image"].shape == (512, 512, 1) + + from sleap.nn.data.resizing import SizeEqualizer + s = SizeEqualizer() + dso = s.transform_dataset(ds) From 5ca2efb1ef819455bf745a46e2add13a847d9217 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 09:09:05 -0800 Subject: [PATCH 03/19] wip --- sleap/nn/data/pipelines.py | 2 +- sleap/nn/data/resizing.py | 61 +++++++++++++++++++++------------ tests/nn/data/test_providers.py | 15 -------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/sleap/nn/data/pipelines.py b/sleap/nn/data/pipelines.py index eb7b51c2f..b37caa714 100644 --- a/sleap/nn/data/pipelines.py +++ b/sleap/nn/data/pipelines.py @@ -123,7 +123,7 @@ def from_blocks( """Create a pipeline from a sequence of providers and transformers. Args: - sequence: List or tuple of providers and transformer instances. + blocks: List or tuple of providers and transformer instances. Returns: An instantiated pipeline with all blocks chained. diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index ce25cb8a8..a8a90403d 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -263,6 +263,8 @@ class SizeEqualizer: keep_full_image: If True, keeps the (original size) full image in the examples. This is useful for multi-scale inference. full_image_key: String name of the key containing the full images. + max_image_height: int The target height to which all smaller images will be resized/padded to. + max_image_width: int The target width to which all smaller images will be resized/padded to. """ image_key: Text = "image" @@ -270,6 +272,8 @@ class SizeEqualizer: points_key: Optional[Text] = "instances" keep_full_image: bool = False full_image_key: Text = "full_image" + max_image_height: int = None + max_image_width: int = None @property def input_keys(self) -> List[Text]: @@ -303,38 +307,51 @@ def transform_dataset(self, ds_input: tf.data.Dataset) -> tf.data.Dataset: before any processing. """ - - # determine max height and width - shapes = [tf.shape(e[self.image_key]) for e in ds_input] - max_height = max([s[-3] for s in shapes]) - max_width = max([s[-2] for s in shapes]) - # mapping function: match to max height width by resizing and padding bottom/right accordingly - def match_to_max_height_and_width(example): + def resize_and_pad(example): + image = example[self.image_key] if self.keep_full_image: - example[self.full_image_key] = example[self.image_key] - - current_shape = tf.shape(example[self.image_key]) - if current_shape[-3] < max_height or current_shape[-2] < max_width: - # match size - example[self.image_key] = tf.image.resize_with_pad( - example[self.image_key], - target_height=max_height, - target_width=max_width, - method=ResizeMethod.BILINEAR, + example[self.full_image_key] = image + + current_shape = tf.shape(image) + if current_shape[-3] < self.max_image_height or current_shape[-2] < self.max_image_width: + # Calculate target height and width for resizing the image (no padding yet) + hratio = self.max_image_height / tf.cast(current_shape[-3], tf.float32) + wratio = self.max_image_width / tf.cast(current_shape[-2], tf.float32) + if hratio > wratio: + target_height=tf.cast(tf.cast(current_shape[-3], tf.float32) * wratio, tf.int32) + target_width=self.max_image_width + example[self.scale_key] = example[self.scale_key] * wratio + else: + target_height=self.max_image_height + target_width=tf.cast(tf.cast(current_shape[-2], tf.float32) * hratio, tf.int32) + example[self.scale_key] = example[self.scale_key] * hratio + # Resize the image to fill one of the dimensions by preserving aspect ratio + image = tf.image.resize_with_pad( + image, + target_height=target_height, + target_width=target_width, + method=tf.image.ResizeMethod.BILINEAR, antialias=False ) - - # ??? + # Pad the image on bottom/right with zeroes to match specified dimensions + image = tf.image.pad_to_bounding_box( + image, + offset_height=0, + offset_width=0, + target_height=self.max_image_height, + target_width=self.max_image_width + ) + example[self.image_key] = tf.cast(image, example[self.image_key].dtype) + # Scale the instance points accordingly if self.points_key: - example[self.points_key] = example[self.points_key] * self.scale - example[self.scale_key] = example[self.scale_key] * self.scale + example[self.points_key] = example[self.points_key] * example[self.scale_key] return example ds_output = ds_input.map( - match_to_max_height_and_width, num_parallel_calls=tf.data.experimental.AUTOTUNE + resize_and_pad, num_parallel_calls=tf.data.experimental.AUTOTUNE ) return ds_output diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 0ed7d7af3..25a55d76a 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -166,18 +166,3 @@ def test_labels_reader_multi_size(): ] ) - # Create a loader for those labels. - labels_reader = providers.LabelsReader(labels) - ds = labels_reader.make_dataset() - ds_iter = iter(ds) - - # Check shapes of individual samples. - example = next(ds_iter) - assert example["image"].shape == (320, 560, 1) - - example = next(ds_iter) - assert example["image"].shape == (512, 512, 1) - - from sleap.nn.data.resizing import SizeEqualizer - s = SizeEqualizer() - dso = s.transform_dataset(ds) From be85e69ea7598eb52bb372e6c0ab2dbe53b46c9c Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 10:15:38 -0800 Subject: [PATCH 04/19] wip --- sleap/nn/data/resizing.py | 15 ++++--- tests/nn/data/test_providers.py | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 5 deletions(-) diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index a8a90403d..19facdb4d 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -252,7 +252,7 @@ def resize(example): @attr.s(auto_attribs=True) -class SizeEqualizer: +class SizeMatcher: """Data transformer that ensures output images have uniform shape by resizing/padding smaller images. Attributes: @@ -314,7 +314,14 @@ def resize_and_pad(example): example[self.full_image_key] = image current_shape = tf.shape(image) - if current_shape[-3] < self.max_image_height or current_shape[-2] < self.max_image_width: + + if (current_shape[-3] == self.max_image_height and current_shape[-2] == self.max_image_width) \ + or current_shape[-3] > self.max_image_height \ + or current_shape[-2] > self.max_image_width: + # If image shape matches target shape, or larger than target shape in any dimension, don't do anything + return example + + elif current_shape[-3] < self.max_image_height or current_shape[-2] < self.max_image_width: # Calculate target height and width for resizing the image (no padding yet) hratio = self.max_image_height / tf.cast(current_shape[-3], tf.float32) wratio = self.max_image_width / tf.cast(current_shape[-2], tf.float32) @@ -330,9 +337,7 @@ def resize_and_pad(example): image = tf.image.resize_with_pad( image, target_height=target_height, - target_width=target_width, - method=tf.image.ResizeMethod.BILINEAR, - antialias=False + target_width=target_width ) # Pad the image on bottom/right with zeroes to match specified dimensions image = tf.image.pad_to_bounding_box( diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 25a55d76a..294d4fb59 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -1,6 +1,7 @@ import numpy as np import tensorflow as tf from sleap.nn.system import use_cpu_only +from sleap.nn.data.resizing import SizeMatcher use_cpu_only() # hide GPUs for test from tests.fixtures.videos import TEST_H5_FILE, TEST_SMALL_ROBOT_MP4_FILE @@ -166,3 +167,74 @@ def test_labels_reader_multi_size(): ] ) + # Create a loader for those labels. + labels_reader = providers.LabelsReader(labels) + ds = labels_reader.make_dataset() + ds_iter = iter(ds) + + # Check LabelReader can provide different shapes of individual samples + assert next(ds_iter)["image"].shape == (320, 560, 1) + assert next(ds_iter)["image"].shape == (512, 512, 1) + + # Check SizeMatcher is a no-op when target dims is not strictly larger than actual image dims + no_op_size_matcher = SizeMatcher(max_image_height=500, max_image_width=500) + transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) + assert next(transform_iter)["image"].shape == (320, 560, 1) + assert next(transform_iter)["image"].shape == (512, 512, 1) + # Variant 2 + no_op_size_matcher = SizeMatcher(max_image_height=320, max_image_width=560) + transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) + assert next(transform_iter)["image"].shape == (320, 560, 1) + assert next(transform_iter)["image"].shape == (512, 512, 1) + # Variant 3 + no_op_size_matcher = SizeMatcher(max_image_height=1320, max_image_width=511) + transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) + assert next(transform_iter)["image"].shape == (320, 560, 1) + assert next(transform_iter)["image"].shape == (512, 512, 1) + # Variant 4 + no_op_size_matcher = SizeMatcher(max_image_height=319, max_image_width=1560) + transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) + assert next(transform_iter)["image"].shape == (320, 560, 1) + assert next(transform_iter)["image"].shape == (512, 512, 1) + + # Check SizeMatcher when target is larger in both dimensions + size_matcher = SizeMatcher(max_image_height=750, max_image_width=750) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (750, 750, 1) + im2 = next(transform_iter)["image"] + assert im2.shape == (750, 750, 1) + # Check padding is correct + + # Check SizeMatcher when target is larger in one dimension + size_matcher = SizeMatcher(max_image_height=750, max_image_width=560) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (750, 560, 1) + im2 = next(transform_iter)["image"] + assert im2.shape == (750, 560, 1) + # Check padding is correct + + ''' + dso_iter = iter(dso) + # Check shapes of individual samples. + example = next(dso_iter) + image = example["image"] + + sleap.nn.viz.plot_img(example["image"]) + plt.savefig("example1_post.png") + + assert example["image"].shape == (1000, 1500, 1) + + example = next(dso_iter) + sleap.nn.viz.plot_img(example["image"]) + plt.savefig("example2_post.png") + + assert example["image"].shape == (1000, 1500, 1) + ''' + assert 0 == 1 + + import matplotlib.pyplot as plt + sleap.nn.viz.plot_img(example["image"]) + plt.savefig("example1_pre.png") + From 6b4ded2525dfdabdc49264d554ce7776c2d33cfe Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 10:27:55 -0800 Subject: [PATCH 05/19] tests --- tests/nn/data/test_providers.py | 40 ++++++++++----------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 294d4fb59..fd50b83f3 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -202,39 +202,23 @@ def test_labels_reader_multi_size(): transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] assert im1.shape == (750, 750, 1) + # Check padding is on the bottom + for y in range(700, 750): + for x in range(0, 750): + assert im1[y][x] == 0 im2 = next(transform_iter)["image"] assert im2.shape == (750, 750, 1) - # Check padding is correct + # Check SizeMatcher when target is larger in one dimension - size_matcher = SizeMatcher(max_image_height=750, max_image_width=560) + size_matcher = SizeMatcher(max_image_height=512, max_image_width=750) transform_iter = iter(size_matcher.transform_dataset(ds)) im1 = next(transform_iter)["image"] - assert im1.shape == (750, 560, 1) + assert im1.shape == (512, 750, 1) im2 = next(transform_iter)["image"] - assert im2.shape == (750, 560, 1) - # Check padding is correct - - ''' - dso_iter = iter(dso) - # Check shapes of individual samples. - example = next(dso_iter) - image = example["image"] - - sleap.nn.viz.plot_img(example["image"]) - plt.savefig("example1_post.png") - - assert example["image"].shape == (1000, 1500, 1) - - example = next(dso_iter) - sleap.nn.viz.plot_img(example["image"]) - plt.savefig("example2_post.png") - - assert example["image"].shape == (1000, 1500, 1) - ''' - assert 0 == 1 - - import matplotlib.pyplot as plt - sleap.nn.viz.plot_img(example["image"]) - plt.savefig("example1_pre.png") + assert im2.shape == (512, 750, 1) + # Check padding is on the right + for y in range(0, 512): + for x in range(512, 750): + assert im2[y][x] == 0 From c82cb18a0ed36564ecfc6f99e177e1a9e21f54b4 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 14:10:25 -0800 Subject: [PATCH 06/19] fix normalization test --- sleap/nn/data/providers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index bb0df192d..dab6a1b17 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -125,10 +125,10 @@ def make_dataset( "skeleton_inds": Tensor of shape (n_instances,) of dtype tf.int32 that specifies the index of the skeleton used for each instance. """ - # Grab an image to test for the dtype. - test_lf = self.labels[0] - test_image = tf.convert_to_tensor(test_lf.image) - image_dtype = test_image.dtype + # Grab the first image to capture dtype and number of color channels. + first_image = tf.convert_to_tensor(self.labels[0].image) + image_dtype = first_image.dtype + image_num_channels = first_image.shape[-1] def py_fetch_lf(ind): """Local function that will not be autographed.""" @@ -167,6 +167,7 @@ def fetch_lf(ind): [ind], [image_dtype, tf.int32, tf.float32, tf.int32, tf.int64, tf.int32], ) + image = tf.ensure_shape(image, (None, None, image_num_channels)) instances = tf.ensure_shape(instances, tf.TensorShape([None, None, 2])) skeleton_inds = tf.ensure_shape(skeleton_inds, tf.TensorShape([None])) From 0f24ac91c559c0b63f3d138184f85e8666308d33 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 21:02:04 -0800 Subject: [PATCH 07/19] support smaller target size --- sleap/nn/data/resizing.py | 11 ++-- tests/nn/data/test_providers.py | 48 ------------------ tests/nn/data/test_resizing.py | 89 +++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 55 deletions(-) diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index 19facdb4d..749c46856 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -315,21 +315,18 @@ def resize_and_pad(example): current_shape = tf.shape(image) - if (current_shape[-3] == self.max_image_height and current_shape[-2] == self.max_image_width) \ - or current_shape[-3] > self.max_image_height \ - or current_shape[-2] > self.max_image_width: - # If image shape matches target shape, or larger than target shape in any dimension, don't do anything - return example - - elif current_shape[-3] < self.max_image_height or current_shape[-2] < self.max_image_width: + # Only apply this transform if image shape differs from target + if current_shape[-3] != self.max_image_height or current_shape[-2] != self.max_image_width: # Calculate target height and width for resizing the image (no padding yet) hratio = self.max_image_height / tf.cast(current_shape[-3], tf.float32) wratio = self.max_image_width / tf.cast(current_shape[-2], tf.float32) if hratio > wratio: + # The bottleneck is width, scale to fit width first then pad to height target_height=tf.cast(tf.cast(current_shape[-3], tf.float32) * wratio, tf.int32) target_width=self.max_image_width example[self.scale_key] = example[self.scale_key] * wratio else: + # The bottleneck is height, scale to fit height first then pad to width target_height=self.max_image_height target_width=tf.cast(tf.cast(current_shape[-2], tf.float32) * hratio, tf.int32) example[self.scale_key] = example[self.scale_key] * hratio diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index fd50b83f3..519759f39 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -1,7 +1,6 @@ import numpy as np import tensorflow as tf from sleap.nn.system import use_cpu_only -from sleap.nn.data.resizing import SizeMatcher use_cpu_only() # hide GPUs for test from tests.fixtures.videos import TEST_H5_FILE, TEST_SMALL_ROBOT_MP4_FILE @@ -175,50 +174,3 @@ def test_labels_reader_multi_size(): # Check LabelReader can provide different shapes of individual samples assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) - - # Check SizeMatcher is a no-op when target dims is not strictly larger than actual image dims - no_op_size_matcher = SizeMatcher(max_image_height=500, max_image_width=500) - transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) - assert next(transform_iter)["image"].shape == (320, 560, 1) - assert next(transform_iter)["image"].shape == (512, 512, 1) - # Variant 2 - no_op_size_matcher = SizeMatcher(max_image_height=320, max_image_width=560) - transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) - assert next(transform_iter)["image"].shape == (320, 560, 1) - assert next(transform_iter)["image"].shape == (512, 512, 1) - # Variant 3 - no_op_size_matcher = SizeMatcher(max_image_height=1320, max_image_width=511) - transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) - assert next(transform_iter)["image"].shape == (320, 560, 1) - assert next(transform_iter)["image"].shape == (512, 512, 1) - # Variant 4 - no_op_size_matcher = SizeMatcher(max_image_height=319, max_image_width=1560) - transform_iter = iter(no_op_size_matcher.transform_dataset(ds)) - assert next(transform_iter)["image"].shape == (320, 560, 1) - assert next(transform_iter)["image"].shape == (512, 512, 1) - - # Check SizeMatcher when target is larger in both dimensions - size_matcher = SizeMatcher(max_image_height=750, max_image_width=750) - transform_iter = iter(size_matcher.transform_dataset(ds)) - im1 = next(transform_iter)["image"] - assert im1.shape == (750, 750, 1) - # Check padding is on the bottom - for y in range(700, 750): - for x in range(0, 750): - assert im1[y][x] == 0 - im2 = next(transform_iter)["image"] - assert im2.shape == (750, 750, 1) - - - # Check SizeMatcher when target is larger in one dimension - size_matcher = SizeMatcher(max_image_height=512, max_image_width=750) - transform_iter = iter(size_matcher.transform_dataset(ds)) - im1 = next(transform_iter)["image"] - assert im1.shape == (512, 750, 1) - im2 = next(transform_iter)["image"] - assert im2.shape == (512, 750, 1) - # Check padding is on the right - for y in range(0, 512): - for x in range(512, 750): - assert im2[y][x] == 0 - diff --git a/tests/nn/data/test_resizing.py b/tests/nn/data/test_resizing.py index 6510dc0a6..f2eb2a3da 100644 --- a/tests/nn/data/test_resizing.py +++ b/tests/nn/data/test_resizing.py @@ -5,8 +5,13 @@ use_cpu_only() # hide GPUs for test +import sleap +from sleap.nn.system import use_cpu_only; use_cpu_only() # hide GPUs for test from sleap.nn.data import resizing from sleap.nn.data import providers +from sleap.nn.data.resizing import SizeMatcher + +from tests.fixtures.videos import TEST_H5_FILE, TEST_SMALL_ROBOT_MP4_FILE def test_find_padding_for_stride(): @@ -117,3 +122,87 @@ def test_resizer_from_config(): resizer = resizing.Resizer.from_config( config=resizing.PreprocessingConfig(input_scaling=0.5, pad_to_stride=None) ) + +def test_size_matcher(): + # Create some fake data using two different size videos. + skeleton = sleap.Skeleton.from_names_and_edge_inds(["A"]) + labels = sleap.Labels( + [ + sleap.LabeledFrame( + frame_idx=0, + video=sleap.Video.from_filename( + TEST_SMALL_ROBOT_MP4_FILE, grayscale=True + ), + instances=[ + sleap.Instance.from_pointsarray( + np.array([[128, 128]]), skeleton=skeleton + ) + ], + ), + sleap.LabeledFrame( + frame_idx=0, + video=sleap.Video.from_filename( + TEST_H5_FILE, dataset="/box", input_format="channels_first" + ), + instances=[ + sleap.Instance.from_pointsarray( + np.array([[128, 128]]), skeleton=skeleton + ) + ], + ), + ] + ) + + # Create a loader for those labels. + labels_reader = providers.LabelsReader(labels) + ds = labels_reader.make_dataset() + ds_iter = iter(ds) + assert next(ds_iter)["image"].shape == (320, 560, 1) + assert next(ds_iter)["image"].shape == (512, 512, 1) + + def check_padding(image, from_y, to_y, from_x, to_x): + for y in range(from_y, to_y): + for x in range(from_x, to_x): + assert image[y][x] == 0 + + # Check SizeMatcher when target dims is not strictly larger than actual image dims + size_matcher = SizeMatcher(max_image_height=560, max_image_width=560) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (560, 560, 1) + # padding should be on the bottom + check_padding(im1, 321, 560, 0, 560) + im2 = next(transform_iter)["image"] + assert im2.shape == (560, 560, 1) + + # Variant 2 + size_matcher = SizeMatcher(max_image_height=320, max_image_width=560) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (320, 560, 1) + im2 = next(transform_iter)["image"] + assert im2.shape == (320, 560, 1) + # padding should be on the right + check_padding(im2, 0, 320, 321, 560) + + # Check SizeMatcher when target is 'max' in both dimensions + size_matcher = SizeMatcher(max_image_height=512, max_image_width=560) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (512, 560, 1) + # Check padding is on the bottom + check_padding(im2, 320, 512, 0, 560) + im2 = next(transform_iter)["image"] + assert im2.shape == (512, 560, 1) + # Check padding is on the right + check_padding(im2, 0, 512, 512, 560) + + # Check SizeMatcher when target is larger in both dimensions + size_matcher = SizeMatcher(max_image_height=750, max_image_width=750) + transform_iter = iter(size_matcher.transform_dataset(ds)) + im1 = next(transform_iter)["image"] + assert im1.shape == (750, 750, 1) + # Check padding is on the bottom + check_padding(im1, 700, 750, 0, 750) + im2 = next(transform_iter)["image"] + assert im2.shape == (750, 750, 1) From e39c6fe9bf2333bf2a6a25bc5dc7e8f5aa60d803 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 21:02:26 -0800 Subject: [PATCH 08/19] support smaller target size --- tests/nn/data/test_resizing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nn/data/test_resizing.py b/tests/nn/data/test_resizing.py index f2eb2a3da..5f5b01d20 100644 --- a/tests/nn/data/test_resizing.py +++ b/tests/nn/data/test_resizing.py @@ -191,7 +191,7 @@ def check_padding(image, from_y, to_y, from_x, to_x): im1 = next(transform_iter)["image"] assert im1.shape == (512, 560, 1) # Check padding is on the bottom - check_padding(im2, 320, 512, 0, 560) + check_padding(im1, 320, 512, 0, 560) im2 = next(transform_iter)["image"] assert im2.shape == (512, 560, 1) # Check padding is on the right From 0f8dd425c8ef1a7237aca381f83e810cd91980df Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 21:16:07 -0800 Subject: [PATCH 09/19] config --- sleap/nn/config/data.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sleap/nn/config/data.py b/sleap/nn/config/data.py index 2c14cc645..bc091a547 100644 --- a/sleap/nn/config/data.py +++ b/sleap/nn/config/data.py @@ -76,6 +76,13 @@ class PreprocessingConfig: max stride (typically 32). This padding will be ignored when instance cropping inputs since the crop size should already be divisible by the model's max stride. + resize_and_pad_to_target: If True, will resize and pad all images in the dataset to match target dimensions. + This is useful when preprocessing datasets with mixed image dimensions (from different video resolutions). + Aspect ratio is preserved, and padding applied (if needed) to bottom or right of image only. + target_height: Target image height for 'resize_and_pad_to_target'. When not explicitly provided, inferred as the + max image height from the dataset. + target_width: Target image width for 'resize_and_pad_to_target'. When not explicitly provided, inferred as the + max image width from the dataset. """ ensure_rgb: bool = False @@ -88,6 +95,9 @@ class PreprocessingConfig: ) input_scaling: float = 1.0 pad_to_stride: Optional[int] = None + resize_and_pad_to_target: bool = True + target_height: Optional[int] = None + target_width: Optional[int] = None @attr.s(auto_attribs=True) From 58d6863a00ace5c0be858d9dd08b63bd995707eb Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 21:27:14 -0800 Subject: [PATCH 10/19] util --- sleap/nn/data/providers.py | 5 ++++- tests/nn/data/test_providers.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index dab6a1b17..971cbf51e 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -3,7 +3,7 @@ import numpy as np import tensorflow as tf import attr -from typing import Text, Optional, List, Sequence, Union +from typing import Text, Optional, List, Sequence, Union, Tuple import sleap @@ -93,6 +93,9 @@ def videos(self) -> List[sleap.Video]: """Return the list of videos that `video_ind` in examples match up with.""" return self.labels.videos + def max_video_height_and_width(self) -> Tuple[int, int]: + return max(video.shape[1] for video in self.videos), max(video.shape[2] for video in self.videos) + def make_dataset( self, ds_index: Optional[tf.data.Dataset] = None ) -> tf.data.Dataset: diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 519759f39..0c8054be0 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -174,3 +174,7 @@ def test_labels_reader_multi_size(): # Check LabelReader can provide different shapes of individual samples assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) + + h, w = labels_reader.max_video_height_and_width() + assert h == 512 + assert w == 560 From 5e9e6e69a892cafdbe2193bf3cb3a89e5cc6b544 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Mon, 1 Feb 2021 21:46:53 -0800 Subject: [PATCH 11/19] from config --- sleap/nn/data/providers.py | 2 +- sleap/nn/data/resizing.py | 62 +++++++++++++++++++++++++++++++++ tests/nn/data/test_providers.py | 2 +- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index 971cbf51e..eff107287 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -93,7 +93,7 @@ def videos(self) -> List[sleap.Video]: """Return the list of videos that `video_ind` in examples match up with.""" return self.labels.videos - def max_video_height_and_width(self) -> Tuple[int, int]: + def max_height_and_width(self) -> Tuple[int, int]: return max(video.shape[1] for video in self.videos), max(video.shape[2] for video in self.videos) def make_dataset( diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index 749c46856..0116edb7d 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -275,6 +275,68 @@ class SizeMatcher: max_image_height: int = None max_image_width: int = None + + @classmethod + def from_config( + cls, + config: PreprocessingConfig, + provider: Optional[Provider] = None, + update_config: bool = True, + image_key: Text = "image", + scale_key: Text = "scale", + keep_full_image: bool = False, + full_image_key: Text = "full_image", + points_key: Optional[Text] = "instances" + ) -> "SizeMatcher": + """Build an instance of this class from configuration. + + Args: + config: An `PreprocessingConfig` instance with the desired parameters. If + `config.resize_and_pad_to_target` is True and 'target_height' / 'target_width' are not set, provider + needs to be set that implements 'max_height_and_width'. + provider: Data provider. + update_config: If True, the input model configuration will be updated with + values inferred from other fields. + image_key: String name of the key containing the images to resize. + scale_key: String name of the key containing the scale of the images. + pad_to_stride: An integer specifying the `pad_to_stride` if + `config.pad_to_stride` is not an explicit integer (e.g., set to None). + keep_full_image: If True, keeps the (original size) full image in the + examples. This is useful for multi-scale inference. + full_image_key: String name of the key containing the full images. + points_key: String name of the key containing points to adjust for the + resizing operation. + Returns: + An instance of this class. + + Raises: + ValueError: If `provider` is not set or does not implement `max_height_and_width`. + """ + if config.resize_and_pad_to_target: + if config.target_height is not None and config.target_width is not None: + max_height = config.target_height + max_width = config.target_width + else: + try: + max_height, max_width = provider.max_height_and_width() + except: + raise ValueError("target_height / target_width could not be determined") + if update_config: + config.target_height = max_height + config.target_width = max_width + else: + max_height, max_width = None, None + + return cls( + image_key=image_key, + points_key=points_key, + scale_key=scale_key, + keep_full_image=keep_full_image, + full_image_key=full_image_key, + max_image_height=max_height, + max_image_width=max_width, + ) + @property def input_keys(self) -> List[Text]: """Return the keys that incoming elements are expected to have.""" diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 0c8054be0..7f35005e4 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -175,6 +175,6 @@ def test_labels_reader_multi_size(): assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) - h, w = labels_reader.max_video_height_and_width() + h, w = labels_reader.max_height_and_width() assert h == 512 assert w == 560 From e9355bb4b71f057753cce19dd002ec4121ecd155 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Tue, 2 Feb 2021 07:24:49 -0800 Subject: [PATCH 12/19] pipelines --- sleap/nn/data/pipelines.py | 43 +++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/sleap/nn/data/pipelines.py b/sleap/nn/data/pipelines.py index b37caa714..21dd57bb7 100644 --- a/sleap/nn/data/pipelines.py +++ b/sleap/nn/data/pipelines.py @@ -22,7 +22,7 @@ RandomCropper, ) from sleap.nn.data.normalization import Normalizer -from sleap.nn.data.resizing import Resizer, PointsRescaler +from sleap.nn.data.resizing import Resizer, PointsRescaler, SizeMatcher from sleap.nn.data.instance_centroids import InstanceCentroidFinder from sleap.nn.data.instance_cropping import InstanceCropper, PredictedInstanceCropper from sleap.nn.data.confidence_maps import ( @@ -68,6 +68,7 @@ RandomCropper, Normalizer, Resizer, + SizeMatcher, InstanceCentroidFinder, InstanceCropper, MultiConfidenceMapGenerator, @@ -352,6 +353,11 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -392,6 +398,11 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: ) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) pipeline += SingleInstanceConfidenceMapGenerator( sigma=self.single_instance_confmap_head.sigma, @@ -484,6 +495,11 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -530,6 +546,11 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: ) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, @@ -638,6 +659,11 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, skeletons=self.data_config.labels.skeletons, @@ -675,6 +701,11 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: ) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, @@ -769,6 +800,11 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -810,6 +846,11 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: ) pipeline += Normalizer.from_config(self.data_config.preprocessing) pipeline += Resizer.from_config(self.data_config.preprocessing) + if self.data_config.preprocessing.resize_and_pad_to_target: + pipeline += SizeMatcher.from_config( + config=self.data_config.preprocessing, + provider=data_provider, + ) pipeline += MultiConfidenceMapGenerator( sigma=self.confmaps_head.sigma, From 7d35c91a859d64931664f27279ff5506c680e951 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Tue, 2 Feb 2021 07:31:30 -0800 Subject: [PATCH 13/19] remove type --- sleap/nn/data/resizing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index 0116edb7d..ec6efcd34 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -280,7 +280,7 @@ class SizeMatcher: def from_config( cls, config: PreprocessingConfig, - provider: Optional[Provider] = None, + provider: Optional = None, update_config: bool = True, image_key: Text = "image", scale_key: Text = "scale", From fecc75157ff94f9b1c47ae1c95ce6c201ea00307 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Tue, 2 Feb 2021 09:56:20 -0800 Subject: [PATCH 14/19] order --- sleap/nn/data/pipelines.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/sleap/nn/data/pipelines.py b/sleap/nn/data/pipelines.py index 21dd57bb7..d8ee2a5a5 100644 --- a/sleap/nn/data/pipelines.py +++ b/sleap/nn/data/pipelines.py @@ -352,12 +352,12 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: """ pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) + pipeline += Resizer.from_config(self.data_config.preprocessing) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -397,13 +397,12 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: crop_width=self.optimization_config.augmentation_config.random_crop_width, ) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) - + pipeline += Resizer.from_config(self.data_config.preprocessing) pipeline += SingleInstanceConfidenceMapGenerator( sigma=self.single_instance_confmap_head.sigma, output_stride=self.single_instance_confmap_head.output_stride, @@ -494,12 +493,12 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: """ pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) + pipeline += Resizer.from_config(self.data_config.preprocessing) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -545,13 +544,12 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: crop_width=self.optimization_config.augmentation_config.random_crop_width, ) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) - + pipeline += Resizer.from_config(self.data_config.preprocessing) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, skeletons=self.data_config.labels.skeletons, @@ -658,12 +656,12 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: """ pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) + pipeline += Resizer.from_config(self.data_config.preprocessing) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, skeletons=self.data_config.labels.skeletons, @@ -700,13 +698,12 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: self.optimization_config.augmentation_config ) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) - + pipeline += Resizer.from_config(self.data_config.preprocessing) pipeline += InstanceCentroidFinder.from_config( self.data_config.instance_cropping, skeletons=self.data_config.labels.skeletons, @@ -799,12 +796,12 @@ def make_base_pipeline(self, data_provider: Provider) -> Pipeline: """ pipeline = Pipeline(providers=data_provider) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) + pipeline += Resizer.from_config(self.data_config.preprocessing) if self.optimization_config.augmentation_config.random_crop: pipeline += RandomCropper( crop_height=self.optimization_config.augmentation_config.random_crop_height, @@ -845,13 +842,12 @@ def make_training_pipeline(self, data_provider: Provider) -> Pipeline: crop_width=aug_config.random_crop_width, ) pipeline += Normalizer.from_config(self.data_config.preprocessing) - pipeline += Resizer.from_config(self.data_config.preprocessing) if self.data_config.preprocessing.resize_and_pad_to_target: pipeline += SizeMatcher.from_config( config=self.data_config.preprocessing, provider=data_provider, ) - + pipeline += Resizer.from_config(self.data_config.preprocessing) pipeline += MultiConfidenceMapGenerator( sigma=self.confmaps_head.sigma, output_stride=self.confmaps_head.output_stride, From 60d269e2bf1ee0543c220f2dfb39054cc973cee3 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Tue, 2 Feb 2021 11:03:30 -0800 Subject: [PATCH 15/19] black --- sleap/nn/data/providers.py | 4 +++- sleap/nn/data/resizing.py | 35 ++++++++++++++++++++-------------- tests/nn/data/test_resizing.py | 1 + 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index eff107287..4aa05638f 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -94,7 +94,9 @@ def videos(self) -> List[sleap.Video]: return self.labels.videos def max_height_and_width(self) -> Tuple[int, int]: - return max(video.shape[1] for video in self.videos), max(video.shape[2] for video in self.videos) + return max(video.shape[1] for video in self.videos), max( + video.shape[2] for video in self.videos + ) def make_dataset( self, ds_index: Optional[tf.data.Dataset] = None diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index ec6efcd34..cc6217b68 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -275,7 +275,6 @@ class SizeMatcher: max_image_height: int = None max_image_width: int = None - @classmethod def from_config( cls, @@ -286,7 +285,7 @@ def from_config( scale_key: Text = "scale", keep_full_image: bool = False, full_image_key: Text = "full_image", - points_key: Optional[Text] = "instances" + points_key: Optional[Text] = "instances", ) -> "SizeMatcher": """Build an instance of this class from configuration. @@ -320,7 +319,9 @@ def from_config( try: max_height, max_width = provider.max_height_and_width() except: - raise ValueError("target_height / target_width could not be determined") + raise ValueError( + "target_height / target_width could not be determined" + ) if update_config: config.target_height = max_height config.target_width = max_width @@ -378,25 +379,30 @@ def resize_and_pad(example): current_shape = tf.shape(image) # Only apply this transform if image shape differs from target - if current_shape[-3] != self.max_image_height or current_shape[-2] != self.max_image_width: + if ( + current_shape[-3] != self.max_image_height + or current_shape[-2] != self.max_image_width + ): # Calculate target height and width for resizing the image (no padding yet) hratio = self.max_image_height / tf.cast(current_shape[-3], tf.float32) wratio = self.max_image_width / tf.cast(current_shape[-2], tf.float32) if hratio > wratio: # The bottleneck is width, scale to fit width first then pad to height - target_height=tf.cast(tf.cast(current_shape[-3], tf.float32) * wratio, tf.int32) - target_width=self.max_image_width + target_height = tf.cast( + tf.cast(current_shape[-3], tf.float32) * wratio, tf.int32 + ) + target_width = self.max_image_width example[self.scale_key] = example[self.scale_key] * wratio else: # The bottleneck is height, scale to fit height first then pad to width - target_height=self.max_image_height - target_width=tf.cast(tf.cast(current_shape[-2], tf.float32) * hratio, tf.int32) + target_height = self.max_image_height + target_width = tf.cast( + tf.cast(current_shape[-2], tf.float32) * hratio, tf.int32 + ) example[self.scale_key] = example[self.scale_key] * hratio # Resize the image to fill one of the dimensions by preserving aspect ratio image = tf.image.resize_with_pad( - image, - target_height=target_height, - target_width=target_width + image, target_height=target_height, target_width=target_width ) # Pad the image on bottom/right with zeroes to match specified dimensions image = tf.image.pad_to_bounding_box( @@ -404,16 +410,17 @@ def resize_and_pad(example): offset_height=0, offset_width=0, target_height=self.max_image_height, - target_width=self.max_image_width + target_width=self.max_image_width, ) example[self.image_key] = tf.cast(image, example[self.image_key].dtype) # Scale the instance points accordingly if self.points_key: - example[self.points_key] = example[self.points_key] * example[self.scale_key] + example[self.points_key] = ( + example[self.points_key] * example[self.scale_key] + ) return example - ds_output = ds_input.map( resize_and_pad, num_parallel_calls=tf.data.experimental.AUTOTUNE ) diff --git a/tests/nn/data/test_resizing.py b/tests/nn/data/test_resizing.py index 5f5b01d20..66c31ab61 100644 --- a/tests/nn/data/test_resizing.py +++ b/tests/nn/data/test_resizing.py @@ -123,6 +123,7 @@ def test_resizer_from_config(): config=resizing.PreprocessingConfig(input_scaling=0.5, pad_to_stride=None) ) + def test_size_matcher(): # Create some fake data using two different size videos. skeleton = sleap.Skeleton.from_names_and_edge_inds(["A"]) From ec008f5eedbb5f6736fb3908104736485adc1038 Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Wed, 3 Feb 2021 08:59:49 -0800 Subject: [PATCH 16/19] is multi size predicate --- sleap/nn/data/providers.py | 3 +++ tests/nn/data/test_providers.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index 4aa05638f..fb0fa9ddf 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -98,6 +98,9 @@ def max_height_and_width(self) -> Tuple[int, int]: video.shape[2] for video in self.videos ) + def is_from_multi_size_videos(self) -> bool: + return len(set(v.shape[1] for v in self.videos)) > 1 or len(set(v.shape[2] for v in self.videos)) > 1 + def make_dataset( self, ds_index: Optional[tf.data.Dataset] = None ) -> tf.data.Dataset: diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index 7f35005e4..f24ec4875 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -11,6 +11,8 @@ def test_labels_reader(min_labels): labels_reader = providers.LabelsReader.from_user_instances(min_labels) ds = labels_reader.make_dataset() + assert not labels_reader.is_from_multi_size_videos() + example = next(iter(ds)) assert len(labels_reader) == 1 @@ -47,6 +49,8 @@ def test_labels_reader_no_visible_points(min_labels): labels_reader = providers.LabelsReader.from_user_instances(min_labels) ds = labels_reader.make_dataset() + assert not labels_reader.is_from_multi_size_videos() + example = next(iter(ds)) # There should be two instances in the labels dataset @@ -175,6 +179,8 @@ def test_labels_reader_multi_size(): assert next(ds_iter)["image"].shape == (320, 560, 1) assert next(ds_iter)["image"].shape == (512, 512, 1) + # Check util functions h, w = labels_reader.max_height_and_width() assert h == 512 assert w == 560 + assert labels_reader.is_from_multi_size_videos() From 8f65d8799ba18a740a99dccfceff5fc7289442aa Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Wed, 3 Feb 2021 09:02:49 -0800 Subject: [PATCH 17/19] is multi size predicate --- sleap/nn/data/providers.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index fb0fa9ddf..0fd2e6b76 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -175,7 +175,13 @@ def fetch_lf(ind): [ind], [image_dtype, tf.int32, tf.float32, tf.int32, tf.int64, tf.int32], ) - image = tf.ensure_shape(image, (None, None, image_num_channels)) + + # Ensure shape with constant or variable height/width, based on whether or not the videos have mixed sizes + if self.is_from_multi_size_videos(): + image = tf.ensure_shape(image, (None, None, image_num_channels)) + else: + image = tf.ensure_shape(image, first_image.shape) + instances = tf.ensure_shape(instances, tf.TensorShape([None, None, 2])) skeleton_inds = tf.ensure_shape(skeleton_inds, tf.TensorShape([None])) From efd4872ac60d762a92a9fd47afee57288721273e Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Wed, 3 Feb 2021 09:06:45 -0800 Subject: [PATCH 18/19] black --- sleap/nn/data/providers.py | 5 ++++- tests/nn/data/test_resizing.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index 0fd2e6b76..9181f4a84 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -99,7 +99,10 @@ def max_height_and_width(self) -> Tuple[int, int]: ) def is_from_multi_size_videos(self) -> bool: - return len(set(v.shape[1] for v in self.videos)) > 1 or len(set(v.shape[2] for v in self.videos)) > 1 + return ( + len(set(v.shape[1] for v in self.videos)) > 1 + or len(set(v.shape[2] for v in self.videos)) > 1 + ) def make_dataset( self, ds_index: Optional[tf.data.Dataset] = None diff --git a/tests/nn/data/test_resizing.py b/tests/nn/data/test_resizing.py index 66c31ab61..891bbb189 100644 --- a/tests/nn/data/test_resizing.py +++ b/tests/nn/data/test_resizing.py @@ -6,7 +6,9 @@ use_cpu_only() # hide GPUs for test import sleap -from sleap.nn.system import use_cpu_only; use_cpu_only() # hide GPUs for test +from sleap.nn.system import use_cpu_only + +use_cpu_only() # hide GPUs for test from sleap.nn.data import resizing from sleap.nn.data import providers from sleap.nn.data.resizing import SizeMatcher From bba04010abcf4e8b19feb7915a270ecfae4a73ce Mon Sep 17 00:00:00 2001 From: Arie Matsliah Date: Wed, 3 Feb 2021 09:58:48 -0800 Subject: [PATCH 19/19] property --- sleap/nn/data/providers.py | 4 +++- sleap/nn/data/resizing.py | 2 +- tests/nn/data/test_providers.py | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sleap/nn/data/providers.py b/sleap/nn/data/providers.py index 9181f4a84..d2e04f2cb 100644 --- a/sleap/nn/data/providers.py +++ b/sleap/nn/data/providers.py @@ -93,11 +93,13 @@ def videos(self) -> List[sleap.Video]: """Return the list of videos that `video_ind` in examples match up with.""" return self.labels.videos + @property def max_height_and_width(self) -> Tuple[int, int]: return max(video.shape[1] for video in self.videos), max( video.shape[2] for video in self.videos ) + @property def is_from_multi_size_videos(self) -> bool: return ( len(set(v.shape[1] for v in self.videos)) > 1 @@ -180,7 +182,7 @@ def fetch_lf(ind): ) # Ensure shape with constant or variable height/width, based on whether or not the videos have mixed sizes - if self.is_from_multi_size_videos(): + if self.is_from_multi_size_videos: image = tf.ensure_shape(image, (None, None, image_num_channels)) else: image = tf.ensure_shape(image, first_image.shape) diff --git a/sleap/nn/data/resizing.py b/sleap/nn/data/resizing.py index cc6217b68..56a2c0315 100644 --- a/sleap/nn/data/resizing.py +++ b/sleap/nn/data/resizing.py @@ -317,7 +317,7 @@ def from_config( max_width = config.target_width else: try: - max_height, max_width = provider.max_height_and_width() + max_height, max_width = provider.max_height_and_width except: raise ValueError( "target_height / target_width could not be determined" diff --git a/tests/nn/data/test_providers.py b/tests/nn/data/test_providers.py index f24ec4875..7b23c5b39 100644 --- a/tests/nn/data/test_providers.py +++ b/tests/nn/data/test_providers.py @@ -11,7 +11,7 @@ def test_labels_reader(min_labels): labels_reader = providers.LabelsReader.from_user_instances(min_labels) ds = labels_reader.make_dataset() - assert not labels_reader.is_from_multi_size_videos() + assert not labels_reader.is_from_multi_size_videos example = next(iter(ds)) @@ -49,7 +49,7 @@ def test_labels_reader_no_visible_points(min_labels): labels_reader = providers.LabelsReader.from_user_instances(min_labels) ds = labels_reader.make_dataset() - assert not labels_reader.is_from_multi_size_videos() + assert not labels_reader.is_from_multi_size_videos example = next(iter(ds)) @@ -180,7 +180,7 @@ def test_labels_reader_multi_size(): assert next(ds_iter)["image"].shape == (512, 512, 1) # Check util functions - h, w = labels_reader.max_height_and_width() + h, w = labels_reader.max_height_and_width assert h == 512 assert w == 560 - assert labels_reader.is_from_multi_size_videos() + assert labels_reader.is_from_multi_size_videos