diff --git a/datumaro/datumaro/plugins/coco_format/converter.py b/datumaro/datumaro/plugins/coco_format/converter.py
index 7b91aef4f5d..42d5497c471 100644
--- a/datumaro/datumaro/plugins/coco_format/converter.py
+++ b/datumaro/datumaro/plugins/coco_format/converter.py
@@ -14,12 +14,13 @@
 
 from datumaro.components.converter import Converter
 from datumaro.components.extractor import (DEFAULT_SUBSET_NAME,
-    AnnotationType, Points, Mask
+    AnnotationType, Points
 )
 from datumaro.components.cli_plugin import CliPlugin
 from datumaro.util import find
 from datumaro.util.image import save_image
 import datumaro.util.mask_tools as mask_tools
+import datumaro.util.annotation_tools as anno_tools
 
 from .format import CocoTask, CocoPath
 
@@ -194,7 +195,7 @@ def crop_segments(cls, instances, img_width, img_height):
             if inst[1]:
                 inst[1] = sum(new_segments, [])
             else:
-                mask = cls.merge_masks(new_segments)
+                mask = mask_tools.merge_masks(new_segments)
                 inst[2] = mask_tools.mask_to_rle(mask)
 
         return instances
@@ -205,8 +206,8 @@ def find_instance_parts(self, group, img_width, img_height):
         masks = [a for a in group if a.type == AnnotationType.mask]
 
         anns = boxes + polygons + masks
-        leader = self.find_group_leader(anns)
-        bbox = self.compute_bbox(anns)
+        leader = anno_tools.find_group_leader(anns)
+        bbox = anno_tools.compute_bbox(anns)
         mask = None
         polygons = [p.points for p in polygons]
 
@@ -228,68 +229,29 @@ def find_instance_parts(self, group, img_width, img_height):
             if masks:
                 if mask is not None:
                     masks += [mask]
-                mask = self.merge_masks(masks)
+                mask = mask_tools.merge_masks([m.image for m in masks])
 
             if mask is not None:
                 mask = mask_tools.mask_to_rle(mask)
             polygons = []
         else:
             if masks:
-                mask = self.merge_masks(masks)
+                mask = mask_tools.merge_masks([m.image for m in masks])
                 polygons += mask_tools.mask_to_polygons(mask)
             mask = None
 
         return [leader, polygons, mask, bbox]
 
-    @staticmethod
-    def find_group_leader(group):
-        return max(group, key=lambda x: x.get_area())
-
-    @staticmethod
-    def merge_masks(masks):
-        if not masks:
-            return None
-
-        def get_mask(m):
-            if isinstance(m, Mask):
-                return m.image
-            else:
-                return m
-
-        binary_mask = get_mask(masks[0])
-        for m in masks[1:]:
-            binary_mask |= get_mask(m)
-
-        return binary_mask
-
-    @staticmethod
-    def compute_bbox(annotations):
-        boxes = [ann.get_bbox() for ann in annotations]
-        x0 = min((b[0] for b in boxes), default=0)
-        y0 = min((b[1] for b in boxes), default=0)
-        x1 = max((b[0] + b[2] for b in boxes), default=0)
-        y1 = max((b[1] + b[3] for b in boxes), default=0)
-        return [x0, y0, x1 - x0, y1 - y0]
-
     @staticmethod
     def find_instance_anns(annotations):
         return [a for a in annotations
-            if a.type in { AnnotationType.bbox, AnnotationType.polygon } or \
-                a.type == AnnotationType.mask and a.label is not None
+            if a.type in { AnnotationType.bbox,
+                AnnotationType.polygon, AnnotationType.mask }
         ]
 
     @classmethod
     def find_instances(cls, annotations):
-        instance_anns = cls.find_instance_anns(annotations)
-
-        ann_groups = []
-        for g_id, group in groupby(instance_anns, lambda a: a.group):
-            if not g_id:
-                ann_groups.extend(([a] for a in group))
-            else:
-                ann_groups.append(list(group))
-
-        return ann_groups
+        return anno_tools.find_instances(cls.find_instance_anns(annotations))
 
     def save_annotations(self, item):
         instances = self.find_instances(item.annotations)
diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py
index 340492638fe..2a32d4f151a 100644
--- a/datumaro/datumaro/plugins/tf_detection_api_format/converter.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/converter.py
@@ -16,98 +16,34 @@
 from datumaro.components.converter import Converter
 from datumaro.components.cli_plugin import CliPlugin
 from datumaro.util.image import encode_image
+from datumaro.util.mask_tools import merge_masks
+from datumaro.util.annotation_tools import (compute_bbox,
+    find_group_leader, find_instances)
 from datumaro.util.tf_util import import_tf as _import_tf
 
 from .format import DetectionApiPath
 tf = _import_tf()
 
 
-# we need it to filter out non-ASCII characters, otherwise training will crash
+# filter out non-ASCII characters, otherwise training will crash
 _printable = set(string.printable)
 def _make_printable(s):
     return ''.join(filter(lambda x: x in _printable, s))
 
-def _make_tf_example(item, get_label_id, get_label, save_images=False):
-    def int64_feature(value):
-        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
-
-    def int64_list_feature(value):
-        return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
-
-    def bytes_feature(value):
-        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
-    def bytes_list_feature(value):
-        return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
-
-    def float_list_feature(value):
-        return tf.train.Feature(float_list=tf.train.FloatList(value=value))
-
-
-    features = {
-        'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
-        'image/filename': bytes_feature(
-            ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
-    }
-
-    if not item.has_image:
-        raise Exception("Failed to export dataset item '%s': "
-            "item has no image info" % item.id)
-    height, width = item.image.size
-
-    features.update({
-        'image/height': int64_feature(height),
-        'image/width': int64_feature(width),
-    })
-
-    features.update({
-        'image/encoded': bytes_feature(b''),
-        'image/format': bytes_feature(b'')
-    })
-    if save_images:
-        if item.has_image and item.image.has_data:
-            fmt = DetectionApiPath.IMAGE_FORMAT
-            buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT)
-
-            features.update({
-                'image/encoded': bytes_feature(buffer),
-                'image/format': bytes_feature(fmt.encode('utf-8')),
-            })
-        else:
-            log.warning("Item '%s' has no image" % item.id)
-
-    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
-    xmaxs = [] # List of normalized right x coordinates in bounding box (1 per box)
-    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
-    ymaxs = [] # List of normalized bottom y coordinates in bounding box (1 per box)
-    classes_text = [] # List of string class name of bounding box (1 per box)
-    classes = [] # List of integer class id of bounding box (1 per box)
-
-    boxes = [ann for ann in item.annotations if ann.type is AnnotationType.bbox]
-    for box in boxes:
-        box_label = _make_printable(get_label(box.label))
-
-        xmins.append(box.points[0] / width)
-        xmaxs.append(box.points[2] / width)
-        ymins.append(box.points[1] / height)
-        ymaxs.append(box.points[3] / height)
-        classes_text.append(box_label.encode('utf-8'))
-        classes.append(get_label_id(box.label))
-
-    if boxes:
-        features.update({
-            'image/object/bbox/xmin': float_list_feature(xmins),
-            'image/object/bbox/xmax': float_list_feature(xmaxs),
-            'image/object/bbox/ymin': float_list_feature(ymins),
-            'image/object/bbox/ymax': float_list_feature(ymaxs),
-            'image/object/class/text': bytes_list_feature(classes_text),
-            'image/object/class/label': int64_list_feature(classes),
-        })
+def int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+def int64_list_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
 
-    tf_example = tf.train.Example(
-        features=tf.train.Features(feature=features))
+def bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
 
-    return tf_example
+def bytes_list_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
+
+def float_list_feature(value):
+    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
 
 class TfDetectionApiConverter(Converter, CliPlugin):
     @classmethod
@@ -115,16 +51,29 @@ def build_cmdline_parser(cls, **kwargs):
         parser = super().build_cmdline_parser(**kwargs)
         parser.add_argument('--save-images', action='store_true',
             help="Save images (default: %(default)s)")
+        parser.add_argument('--save-masks', action='store_true',
+            help="Include instance masks (default: %(default)s)")
         return parser
 
-    def __init__(self, save_images=False):
+    def __init__(self, save_images=False, save_masks=False):
         super().__init__()
 
         self._save_images = save_images
+        self._save_masks = save_masks
 
     def __call__(self, extractor, save_dir):
         os.makedirs(save_dir, exist_ok=True)
 
+        label_categories = extractor.categories().get(AnnotationType.label,
+            LabelCategories())
+        get_label = lambda label_id: label_categories.items[label_id].name \
+            if label_id is not None else ''
+        label_ids = OrderedDict((label.name, 1 + idx)
+            for idx, label in enumerate(label_categories.items))
+        map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
+        self._get_label = get_label
+        self._get_label_id = map_label_id
+
         subsets = extractor.subsets()
         if len(subsets) == 0:
             subsets = [ None ]
@@ -136,14 +85,6 @@ def __call__(self, extractor, save_dir):
                 subset_name = DEFAULT_SUBSET_NAME
                 subset = extractor
 
-            label_categories = subset.categories().get(AnnotationType.label,
-                LabelCategories())
-            get_label = lambda label_id: label_categories.items[label_id].name \
-                if label_id is not None else ''
-            label_ids = OrderedDict((label.name, 1 + idx)
-                for idx, label in enumerate(label_categories.items))
-            map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0)
-
             labelmap_path = osp.join(save_dir, DetectionApiPath.LABELMAP_FILE)
             with codecs.open(labelmap_path, 'w', encoding='utf8') as f:
                 for label, idx in label_ids.items():
@@ -157,10 +98,106 @@ def __call__(self, extractor, save_dir):
             anno_path = osp.join(save_dir, '%s.tfrecord' % (subset_name))
             with tf.io.TFRecordWriter(anno_path) as writer:
                 for item in subset:
-                    tf_example = _make_tf_example(
-                        item,
-                        get_label=get_label,
-                        get_label_id=map_label_id,
-                        save_images=self._save_images,
-                    )
+                    tf_example = self._make_tf_example(item)
                     writer.write(tf_example.SerializeToString())
+
+    @staticmethod
+    def _find_instances(annotations):
+        return find_instances(a for a in annotations
+            if a.type in { AnnotationType.bbox, AnnotationType.mask })
+
+    def _find_instance_parts(self, group, img_width, img_height):
+        boxes = [a for a in group if a.type == AnnotationType.bbox]
+        masks = [a for a in group if a.type == AnnotationType.mask]
+
+        anns = boxes + masks
+        leader = find_group_leader(anns)
+        bbox = compute_bbox(anns)
+
+        mask = None
+        if self._save_masks:
+            mask = merge_masks([m.image for m in masks])
+
+        return [leader, mask, bbox]
+
+    def _export_instances(self, instances, width, height):
+        xmins = [] # List of normalized left x coordinates of bounding boxes (1 per box)
+        xmaxs = [] # List of normalized right x coordinates of bounding boxes (1 per box)
+        ymins = [] # List of normalized top y coordinates of bounding boxes (1 per box)
+        ymaxs = [] # List of normalized bottom y coordinates of bounding boxes (1 per box)
+        classes_text = [] # List of class names of bounding boxes (1 per box)
+        classes = [] # List of class ids of bounding boxes (1 per box)
+        masks = [] # List of PNG-encoded instance masks (1 per box)
+
+        for leader, mask, box in instances:
+            label = _make_printable(self._get_label(leader.label))
+            classes_text.append(label.encode('utf-8'))
+            classes.append(self._get_label_id(leader.label))
+
+            xmins.append(box[0] / width)
+            xmaxs.append((box[0] + box[2]) / width)
+            ymins.append(box[1] / height)
+            ymaxs.append((box[1] + box[3]) / height)
+
+            if self._save_masks:
+                if mask is not None:
+                    mask = encode_image(mask, '.png')
+                else:
+                    mask = b''
+                masks.append(mask)
+
+        result = {}
+        if classes:
+            result = {
+                'image/object/bbox/xmin': float_list_feature(xmins),
+                'image/object/bbox/xmax': float_list_feature(xmaxs),
+                'image/object/bbox/ymin': float_list_feature(ymins),
+                'image/object/bbox/ymax': float_list_feature(ymaxs),
+                'image/object/class/text': bytes_list_feature(classes_text),
+                'image/object/class/label': int64_list_feature(classes),
+            }
+            if masks:
+                result['image/object/mask'] = bytes_list_feature(masks)
+        return result
+
+    def _make_tf_example(self, item):
+        features = {
+            'image/source_id': bytes_feature(str(item.id).encode('utf-8')),
+            'image/filename': bytes_feature(
+                ('%s%s' % (item.id, DetectionApiPath.IMAGE_EXT)).encode('utf-8')),
+        }
+
+        if not item.has_image:
+            raise Exception("Failed to export dataset item '%s': "
+                "item has no image info" % item.id)
+        height, width = item.image.size
+
+        features.update({
+            'image/height': int64_feature(height),
+            'image/width': int64_feature(width),
+        })
+
+        features.update({
+            'image/encoded': bytes_feature(b''),
+            'image/format': bytes_feature(b'')
+        })
+        if self._save_images:
+            if item.has_image and item.image.has_data:
+                fmt = DetectionApiPath.IMAGE_FORMAT
+                buffer = encode_image(item.image.data, DetectionApiPath.IMAGE_EXT)
+
+                features.update({
+                    'image/encoded': bytes_feature(buffer),
+                    'image/format': bytes_feature(fmt.encode('utf-8')),
+                })
+            else:
+                log.warning("Item '%s' has no image" % item.id)
+
+        instances = self._find_instances(item.annotations)
+        instances = [self._find_instance_parts(i, width, height) for i in instances]
+        features.update(self._export_instances(instances, width, height))
+
+        tf_example = tf.train.Example(
+            features=tf.train.Features(feature=features))
+
+        return tf_example
diff --git a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
index 8974c65d805..eebff4a19dc 100644
--- a/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
+++ b/datumaro/datumaro/plugins/tf_detection_api_format/extractor.py
@@ -10,7 +10,7 @@
 
 from datumaro.components.extractor import (SourceExtractor,
     DEFAULT_SUBSET_NAME, DatasetItem,
-    AnnotationType, Bbox, LabelCategories
+    AnnotationType, Bbox, Mask, LabelCategories
 )
 from datumaro.util.image import Image, decode_image, lazy_image
 from datumaro.util.tf_util import import_tf as _import_tf
@@ -147,6 +147,8 @@ def _parse_tfrecord_file(cls, filepath, subset_name, images_dir):
             labels = tf.sparse.to_dense(
                 parsed_record['image/object/class/text'],
                 default_value=b'').numpy()
+            masks = tf.sparse.to_dense(
+                parsed_record['image/object/mask']).numpy()
 
             for label, label_id in zip(labels, label_ids):
                 label = label.decode('utf-8')
@@ -163,15 +165,38 @@ def _parse_tfrecord_file(cls, filepath, subset_name, images_dir):
                 item_id = osp.splitext(frame_filename)[0]
 
             annotations = []
-            for shape in np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]:
+            for shape_id, shape in enumerate(
+                    np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]):
                 label = shape[0].decode('utf-8')
-                x = clamp(shape[1] * frame_width, 0, frame_width)
-                y = clamp(shape[2] * frame_height, 0, frame_height)
-                w = clamp(shape[3] * frame_width, 0, frame_width) - x
-                h = clamp(shape[4] * frame_height, 0, frame_height) - y
-                annotations.append(Bbox(x, y, w, h,
-                    label=dataset_labels.get(label)
-                ))
+
+                mask = None
+                if len(masks) != 0:
+                    mask = masks[shape_id]
+
+                if mask is not None:
+                    if isinstance(mask, bytes):
+                        mask = lazy_image(mask, decode_image)
+                    annotations.append(Mask(image=mask,
+                        label=dataset_labels.get(label)
+                    ))
+                else:
+                    x = clamp(shape[1] * frame_width, 0, frame_width)
+                    y = clamp(shape[2] * frame_height, 0, frame_height)
+                    w = clamp(shape[3] * frame_width, 0, frame_width) - x
+                    h = clamp(shape[4] * frame_height, 0, frame_height) - y
+                    annotations.append(Bbox(x, y, w, h,
+                        label=dataset_labels.get(label)
+                    ))
+
+            image_size = None
+            if frame_height and frame_width:
+                image_size = (frame_height, frame_width)
+
+            image_params = {}
+            if frame_image and frame_format:
+                image_params['data'] = lazy_image(frame_image, decode_image)
+            if frame_filename and images_dir:
+                image_params['path'] = osp.join(images_dir, frame_filename)
 
             image_size = None
             if frame_height and frame_width:
diff --git a/datumaro/datumaro/plugins/transforms.py b/datumaro/datumaro/plugins/transforms.py
index 81c5ff50187..693edbc339c 100644
--- a/datumaro/datumaro/plugins/transforms.py
+++ b/datumaro/datumaro/plugins/transforms.py
@@ -3,16 +3,16 @@
 #
 # SPDX-License-Identifier: MIT
 
-from itertools import groupby
 import logging as log
 import os.path as osp
 
 import pycocotools.mask as mask_utils
 
 from datumaro.components.extractor import (Transform, AnnotationType,
-    Mask, RleMask, Polygon, Bbox)
+    RleMask, Polygon, Bbox)
 from datumaro.components.cli_plugin import CliPlugin
 import datumaro.util.mask_tools as mask_tools
+from datumaro.util.annotation_tools import find_group_leader, find_instances
 
 
 class CropCoveredSegments(Transform, CliPlugin):
@@ -125,7 +125,7 @@ def merge_segments(cls, instance, img_width, img_height,
         if not polygons and not masks:
             return []
 
-        leader = cls.find_group_leader(polygons + masks)
+        leader = find_group_leader(polygons + masks)
         instance = []
 
         # Build the resulting mask
@@ -138,9 +138,10 @@ def merge_segments(cls, instance, img_width, img_height,
             instance += polygons # keep unused polygons
 
         if masks:
+            masks = [m.image for m in masks]
             if mask is not None:
                 masks += [mask]
-            mask = cls.merge_masks(masks)
+            mask = mask_tools.merge_masks(masks)
 
         if mask is None:
             return instance
@@ -154,41 +155,10 @@ def merge_segments(cls, instance, img_width, img_height,
         )
         return instance
 
-    @staticmethod
-    def find_group_leader(group):
-        return max(group, key=lambda x: x.get_area())
-
-    @staticmethod
-    def merge_masks(masks):
-        if not masks:
-            return None
-
-        def get_mask(m):
-            if isinstance(m, Mask):
-                return m.image
-            else:
-                return m
-
-        binary_mask = get_mask(masks[0])
-        for m in masks[1:]:
-            binary_mask |= get_mask(m)
-
-        return binary_mask
-
     @staticmethod
     def find_instances(annotations):
-        segment_anns = (a for a in annotations
-            if a.type in {AnnotationType.polygon, AnnotationType.mask}
-        )
-
-        ann_groups = []
-        for g_id, group in groupby(segment_anns, lambda a: a.group):
-            if g_id is None:
-                ann_groups.extend(([a] for a in group))
-            else:
-                ann_groups.append(list(group))
-
-        return ann_groups
+        return find_instances(a for a in annotations
+            if a.type in {AnnotationType.polygon, AnnotationType.mask})
 
 class PolygonsToMasks(Transform, CliPlugin):
     def transform_item(self, item):
diff --git a/datumaro/datumaro/util/annotation_tools.py b/datumaro/datumaro/util/annotation_tools.py
new file mode 100644
index 00000000000..00871b157ec
--- /dev/null
+++ b/datumaro/datumaro/util/annotation_tools.py
@@ -0,0 +1,28 @@
+
+# Copyright (C) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from itertools import groupby
+
+
+def find_instances(instance_anns):
+    ann_groups = []
+    for g_id, group in groupby(instance_anns, lambda a: a.group):
+        if not g_id:
+            ann_groups.extend(([a] for a in group))
+        else:
+            ann_groups.append(list(group))
+
+    return ann_groups
+
+def find_group_leader(group):
+    return max(group, key=lambda x: x.get_area())
+
+def compute_bbox(annotations):
+    boxes = [ann.get_bbox() for ann in annotations]
+    x0 = min((b[0] for b in boxes), default=0)
+    y0 = min((b[1] for b in boxes), default=0)
+    x1 = max((b[0] + b[2] for b in boxes), default=0)
+    y1 = max((b[1] + b[3] for b in boxes), default=0)
+    return [x0, y0, x1 - x0, y1 - y0]
\ No newline at end of file
diff --git a/datumaro/datumaro/util/image.py b/datumaro/datumaro/util/image.py
index 712a4f789ea..2d465f71a4c 100644
--- a/datumaro/datumaro/util/image.py
+++ b/datumaro/datumaro/util/image.py
@@ -169,8 +169,6 @@ def __init__(self, data=None, path=None, loader=None, cache=None,
         if size is not None:
             assert len(size) == 2 and 0 < size[0] and 0 < size[1], size
             size = tuple(size)
-        else:
-            size = None
         self._size = size # (H, W)
 
         assert path is None or isinstance(path, str)
diff --git a/datumaro/tests/test_tfrecord_format.py b/datumaro/tests/test_tfrecord_format.py
index efbef0fd2b8..0bd29ae4179 100644
--- a/datumaro/tests/test_tfrecord_format.py
+++ b/datumaro/tests/test_tfrecord_format.py
@@ -3,7 +3,7 @@
 from unittest import TestCase
 
 from datumaro.components.extractor import (Extractor, DatasetItem,
-    AnnotationType, Bbox, LabelCategories
+    AnnotationType, Bbox, Mask, LabelCategories
 )
 from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter
 from datumaro.plugins.tf_detection_api_format.extractor import TfDetectionApiExtractor
@@ -65,6 +65,35 @@ def categories(self):
                 TestExtractor(), TfDetectionApiConverter(save_images=True),
                 test_dir)
 
+    def test_can_save_masks(self):
+        class TestExtractor(Extractor):
+            def __iter__(self):
+                return iter([
+                    DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)),
+                        annotations=[
+                            Mask(image=np.array([
+                                [1, 0, 0, 1],
+                                [0, 1, 1, 0],
+                                [0, 1, 1, 0],
+                                [1, 0, 0, 1],
+                            ]), label=1),
+                        ]
+                    ),
+                ])
+
+            def categories(self):
+                label_cat = LabelCategories()
+                for label in range(10):
+                    label_cat.add('label_' + str(label))
+                return {
+                    AnnotationType.label: label_cat,
+                }
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(
+                TestExtractor(), TfDetectionApiConverter(save_masks=True),
+                test_dir)
+
     def test_can_save_dataset_with_no_subsets(self):
         class TestExtractor(Extractor):
             def __iter__(self):
diff --git a/datumaro/tests/test_transforms.py b/datumaro/tests/test_transforms.py
index 11e997b19d7..6260fe517fd 100644
--- a/datumaro/tests/test_transforms.py
+++ b/datumaro/tests/test_transforms.py
@@ -159,8 +159,10 @@ def __iter__(self):
                                     [1, 0, 0, 0, 0],
                                     [1, 1, 1, 0, 0]],
                                 ),
-                                z_order=0),
+                                z_order=0, group=1),
                             Polygon([1, 1, 4, 1, 4, 4, 1, 4],
+                                z_order=1, group=1),
+                            Polygon([0, 0, 0, 2, 2, 2, 2, 0],
                                 z_order=1),
                         ]
                     ),
@@ -178,7 +180,15 @@ def __iter__(self):
                                     [1, 1, 1, 1, 0],
                                     [1, 1, 1, 0, 0]],
                                 ),
-                                z_order=0),
+                                z_order=0, group=1),
+                            Mask(np.array([
+                                    [1, 1, 0, 0, 0],
+                                    [1, 1, 0, 0, 0],
+                                    [0, 0, 0, 0, 0],
+                                    [0, 0, 0, 0, 0],
+                                    [0, 0, 0, 0, 0]],
+                                ),
+                                z_order=1),
                         ]
                     ),
                 ])