Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow video with disabeld decoding without decord #7262

Merged
merged 1 commit into from
Oct 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 23 additions & 35 deletions src/datasets/features/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,10 @@ def encode_example(self, value: Union[str, bytes, dict, np.ndarray, "VideoReader
`dict` with "path" and "bytes" fields
"""
if config.DECORD_AVAILABLE:
# We need to import torch first, otherwise later it can cause issues
# e.g. "RuntimeError: random_device could not be read"
# when running `torch.tensor(value).share_memory_()`
if config.TORCH_AVAILABLE:
import torch # noqa
from decord import VideoReader

else:
raise ImportError("To support encoding videos, please install 'decord'.")
VideoReader = None

if isinstance(value, list):
value = np.array(value)
Expand All @@ -102,7 +97,7 @@ def encode_example(self, value: Union[str, bytes, dict, np.ndarray, "VideoReader
elif isinstance(value, np.ndarray):
# convert the video array to bytes
return encode_np_array(value)
elif isinstance(value, VideoReader):
elif VideoReader and isinstance(value, VideoReader):
# convert the decord video reader to bytes
return encode_decord_video(value)
elif value.get("path") is not None and os.path.isfile(value["path"]):
Expand Down Expand Up @@ -138,12 +133,8 @@ def decode_example(self, value: dict, token_per_repo_id=None) -> "VideoReader":
raise RuntimeError("Decoding is disabled for this feature. Please use Video(decode=True) instead.")

if config.DECORD_AVAILABLE:
# We need to import torch first, otherwise later it can cause issues
# e.g. "RuntimeError: random_device could not be read"
# when running `torch.tensor(value).share_memory_()`
if config.TORCH_AVAILABLE:
import torch # noqa
from decord import VideoReader

else:
raise ImportError("To support decoding videos, please install 'decord'.")

Expand Down Expand Up @@ -283,26 +274,23 @@ def _patched_get_batch(self: "VideoReader", *args, **kwargs):


def patch_decord():
if config.DECORD_AVAILABLE:
# We need to import torch first, otherwise later it can cause issues
# e.g. "RuntimeError: random_device could not be read"
# when running `torch.tensor(value).share_memory_()`
# Same for duckdb which crashes on import
if config.TORCH_AVAILABLE:
import torch # noqa
if config.DUCKDB_AVAILABLE:
import duckdb # noqa
import decord.video_reader
from decord import VideoReader

if not hasattr(VideoReader, "_hf_patched"):
decord.video_reader.bridge_out = lambda x: x
VideoReader._original_init = VideoReader.__init__
VideoReader.__init__ = _patched_init
VideoReader._original_next = VideoReader.next
VideoReader.next = _patched_next
VideoReader._original_get_batch = VideoReader.get_batch
VideoReader.get_batch = _patched_get_batch
VideoReader._hf_patched = True
else:
raise ImportError("To support decoding videos, please install 'decord'.")
# We need to import torch first, otherwise later it can cause issues
# e.g. "RuntimeError: random_device could not be read"
# when running `torch.tensor(value).share_memory_()`
# Same for duckdb which crashes on import
if config.TORCH_AVAILABLE:
import torch # noqa
if config.DUCKDB_AVAILABLE:
import duckdb # noqa
import decord.video_reader
from decord import VideoReader

if not hasattr(VideoReader, "_hf_patched"):
decord.video_reader.bridge_out = lambda x: x
VideoReader._original_init = VideoReader.__init__
VideoReader.__init__ = _patched_init
VideoReader._original_next = VideoReader.next
VideoReader.next = _patched_next
VideoReader._original_get_batch = VideoReader.get_batch
VideoReader.get_batch = _patched_get_batch
VideoReader._hf_patched = True
Loading