Skip to content

Commit

Permalink
Don't embed videos (#7259)
Browse files Browse the repository at this point in the history
don't embed videos
  • Loading branch information
lhoestq authored Oct 28, 2024
1 parent ff0149f commit f75f489
Showing 1 changed file with 1 addition and 36 deletions.
37 changes: 1 addition & 36 deletions src/datasets/features/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ..download.download_config import DownloadConfig
from ..table import array_cast
from ..utils.file_utils import is_local_path, xopen
from ..utils.py_utils import no_op_if_value_is_null, string_to_dict
from ..utils.py_utils import string_to_dict


if TYPE_CHECKING:
Expand Down Expand Up @@ -236,41 +236,6 @@ def cast_storage(self, storage: Union[pa.StringArray, pa.StructArray, pa.ListArr
)
return array_cast(storage, self.pa_type)

def embed_storage(self, storage: pa.StructArray) -> pa.StructArray:
"""Embed video files into the Arrow array.
Args:
storage (`pa.StructArray`):
PyArrow array to embed.
Returns:
`pa.StructArray`: Array in the Video arrow storage type, that is
`pa.struct({"bytes": pa.binary(), "path": pa.string()})`.
"""

@no_op_if_value_is_null
def path_to_bytes(path):
with xopen(path, "rb") as f:
bytes_ = f.read()
return bytes_

bytes_array = pa.array(
[
(path_to_bytes(x["path"]) if x["bytes"] is None else x["bytes"]) if x is not None else None
for x in storage.to_pylist()
],
type=pa.binary(),
)
path_array = pa.array(
[
(os.path.basename(path) if os.path.isfile(path) else path) if path is not None else None
for path in storage.field("path").to_pylist()
],
type=pa.string(),
)
storage = pa.StructArray.from_arrays([bytes_array, path_array], ["bytes", "path"], mask=bytes_array.is_null())
return array_cast(storage, self.pa_type)


def video_to_bytes(video: "VideoReader") -> bytes:
"""Convert a decord Video object to bytes using native compression if possible"""
Expand Down

0 comments on commit f75f489

Please sign in to comment.