Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVIF decoder part 2 #8603

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/setup-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ conda create \
conda activate ci
conda install --quiet --yes libjpeg-turbo -c pytorch
pip install --progress-bar=off --upgrade setuptools==72.1.0
conda install libavif -c pytorch-nightly --yes

# See https://github.com/pytorch/vision/issues/6790
if [[ "${PYTHON_VERSION}" != "3.11" ]]; then
Expand Down
2 changes: 2 additions & 0 deletions packaging/pre_build_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@ else
pip install auditwheel
fi

conda install libavif -c pytorch-nightly -y

pip install numpy pyyaml future ninja
pip install --upgrade setuptools==72.1.0
2 changes: 2 additions & 0 deletions packaging/torchvision/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ requirements:
- libpng
- libjpeg-turbo
- libwebp
- libavif >= 1.1.1
- ffmpeg >=4.2.2, <5.0.0 # [linux]

host:
Expand All @@ -30,6 +31,7 @@ requirements:
- ffmpeg >=4.2.2, <5.0.0 # [linux]
- libjpeg-turbo
- libwebp
- libavif >= 1.1.1
- pillow >=5.3.0, !=8.3.*
- pytorch-mutex 1.0 {{ build_variant }} # [not osx ]
{{ environ.get('CONDA_PYTORCH_CONSTRAINT', 'pytorch') }}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "0") == "1" # TODO enable by default!
USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "1") == "1"
USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
# Note: the GPU video decoding stuff used to be called "video codec", which
Expand Down
3 changes: 3 additions & 0 deletions test/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ def smoke_test_torchvision_read_decode() -> None:
img_webp = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
if img_webp.shape != (3, 100, 100):
raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")
img_avif = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))
if img_avif.shape != (3, 100, 100):
raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}")


def smoke_test_torchvision_decode_jpeg(device: str = "cpu"):
Expand Down
14 changes: 8 additions & 6 deletions test/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
from torchvision.io.image import (
_decode_avif,
decode_avif,
decode_gif,
decode_image,
decode_jpeg,
Expand Down Expand Up @@ -863,7 +863,7 @@ def test_decode_gif(tmpdir, name, scripted):
torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)


@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp))
@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp, decode_avif))
def test_decode_gif_webp_errors(decode_fun):
encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
Expand All @@ -876,6 +876,8 @@ def test_decode_gif_webp_errors(decode_fun):
expected_match = re.escape("DGifOpenFileName() failed - 103")
elif decode_fun is decode_webp:
expected_match = "WebPGetFeatures failed."
else:
expected_match = "avifDecoderParse failed: BMFF parsing failed"
with pytest.raises(RuntimeError, match=expected_match):
decode_fun(encoded_data)

Expand Down Expand Up @@ -916,8 +918,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
assert_equal(img, from_pil)


@pytest.mark.xfail(reason="AVIF support not enabled yet.")
@pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
@pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
@pytest.mark.parametrize("scripted", (False, True))
def test_decode_avif(decode_fun, scripted):
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
Expand All @@ -928,11 +929,12 @@ def test_decode_avif(decode_fun, scripted):
assert img[None].is_contiguous(memory_format=torch.channels_last)


@pytest.mark.xfail(reason="AVIF support not enabled yet.")
# Run on avif files from https://github.com/AOMediaCodec/libavif/tree/main/tests/data
@pytest.mark.skip(reason="Need to download test images first")
# Note: decode_image fails because some of these files have a (valid) signature
# we don't recognize. We should probably use libmagic....
# @pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
@pytest.mark.parametrize("decode_fun", (_decode_avif,))
@pytest.mark.parametrize("decode_fun", (decode_avif,))
@pytest.mark.parametrize("scripted", (False, True))
@pytest.mark.parametrize(
"mode, pil_mode",
Expand Down
2 changes: 2 additions & 0 deletions torchvision/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
VideoMetaData,
)
from .image import (
decode_avif,
decode_gif,
decode_image,
decode_jpeg,
Expand Down Expand Up @@ -63,6 +64,7 @@
"decode_png",
"decode_webp",
"decode_gif",
"decode_avif",
"encode_jpeg",
"encode_png",
"read_file",
Expand Down
2 changes: 1 addition & 1 deletion torchvision/io/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def decode_webp(
return torch.ops.image.decode_webp(input, mode.value)


def _decode_avif(
def decode_avif(
input: torch.Tensor,
mode: ImageReadMode = ImageReadMode.UNCHANGED,
) -> torch.Tensor:
Expand Down
Loading