Skip to content

Commit

Permalink
Use beam.FlatMap in webvid instead of Map and then Filter on None
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 525356429
  • Loading branch information
tomvdw authored and The TensorFlow Datasets Authors committed Apr 19, 2023
1 parent 4b994f4 commit 6762408
Showing 1 changed file with 5 additions and 9 deletions.
14 changes: 5 additions & 9 deletions tensorflow_datasets/datasets/webvid/webvid_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,16 +285,16 @@ def _process_example(data_row):
logging.warning(
'Timed out while processing %s with exception: %s', file_path, e
)
return None
return
except Exception: # pylint: disable=broad-except
self._process_error_counter.inc()
logging.exception('Failed to process video %s.', file_path)
return None
return

if frames is None:
self._empty_video_counter.inc()
logging.warning('Empty video %s', file_path)
return None
return

self._frame_count_dist.update(len(frames))
logging.info(
Expand All @@ -316,7 +316,7 @@ def _process_example(data_row):

self._final_caption_len_dist.update(len(features['caption']))
self._success_counter.inc()
return new_video_id, features
yield new_video_id, features

# Get list of videos in file system.
files = epath.Path(image_base_path).glob(os.path.join('*', '*_*', '*.mp4'))
Expand All @@ -343,8 +343,4 @@ def path_to_id(path):
logging.info('Number of rows %s', df.shape)
df = df.to_dict('records')

return (
beam.Create(df)
| beam.Map(_process_example)
| beam.Filter(lambda x: x is not None)
)
return beam.Create(df) | beam.FlatMap(_process_example)

0 comments on commit 6762408

Please sign in to comment.