From 6762408316d2968c911dcf7b194ab500c16b9d43 Mon Sep 17 00:00:00 2001 From: Tom van der Weide Date: Tue, 18 Apr 2023 23:16:25 -0700 Subject: [PATCH] Use beam.FlatMap in webvid instead of Map and then Filter on None PiperOrigin-RevId: 525356429 --- .../datasets/webvid/webvid_dataset_builder.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tensorflow_datasets/datasets/webvid/webvid_dataset_builder.py b/tensorflow_datasets/datasets/webvid/webvid_dataset_builder.py index 4c73e923cf1..e8dfcfce2b6 100644 --- a/tensorflow_datasets/datasets/webvid/webvid_dataset_builder.py +++ b/tensorflow_datasets/datasets/webvid/webvid_dataset_builder.py @@ -285,16 +285,16 @@ def _process_example(data_row): logging.warning( 'Timed out while processing %s with exception: %s', file_path, e ) - return None + return except Exception: # pylint: disable=broad-except self._process_error_counter.inc() logging.exception('Failed to process video %s.', file_path) - return None + return if frames is None: self._empty_video_counter.inc() logging.warning('Empty video %s', file_path) - return None + return self._frame_count_dist.update(len(frames)) logging.info( @@ -316,7 +316,7 @@ def _process_example(data_row): self._final_caption_len_dist.update(len(features['caption'])) self._success_counter.inc() - return new_video_id, features + yield new_video_id, features # Get list of videos in file system. files = epath.Path(image_base_path).glob(os.path.join('*', '*_*', '*.mp4')) @@ -343,8 +343,4 @@ def path_to_id(path): logging.info('Number of rows %s', df.shape) df = df.to_dict('records') - return ( - beam.Create(df) - | beam.Map(_process_example) - | beam.Filter(lambda x: x is not None) - ) + return beam.Create(df) | beam.FlatMap(_process_example)