pytorch · NicolasHug · Aug 20, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
@@ -35,6 +35,7 @@ conda create \
 conda activate ci
 conda install --quiet --yes libjpeg-turbo -c pytorch
 pip install --progress-bar=off --upgrade setuptools==72.1.0
+conda install libavif -c pytorch-nightly --yes
 
 # See https://github.com/pytorch/vision/issues/6790
 if [[ "${PYTHON_VERSION}" != "3.11" ]]; then

diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh
@@ -36,5 +36,7 @@ else
   pip install auditwheel
 fi
 
+conda install libavif -c pytorch-nightly -y
+
 pip install numpy pyyaml future ninja
 pip install --upgrade setuptools==72.1.0
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
@@ -12,6 +12,7 @@ requirements:
     - libpng
     - libjpeg-turbo
     - libwebp
+    - libavif >= 1.1.1
     - ffmpeg >=4.2.2, <5.0.0  # [linux]
 
   host:
@@ -30,6 +31,7 @@ requirements:
     - ffmpeg >=4.2.2, <5.0.0  # [linux]
     - libjpeg-turbo
     - libwebp
+    - libavif >= 1.1.1
     - pillow >=5.3.0, !=8.3.*
     - pytorch-mutex 1.0 {{ build_variant }}  # [not osx ]
     {{ environ.get('CONDA_PYTORCH_CONSTRAINT', 'pytorch') }}

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
 USE_PNG = os.getenv("TORCHVISION_USE_PNG", "1") == "1"
 USE_JPEG = os.getenv("TORCHVISION_USE_JPEG", "1") == "1"
 USE_WEBP = os.getenv("TORCHVISION_USE_WEBP", "1") == "1"
-USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "0") == "1"  # TODO enable by default!
+USE_AVIF = os.getenv("TORCHVISION_USE_AVIF", "1") == "1"
 USE_NVJPEG = os.getenv("TORCHVISION_USE_NVJPEG", "1") == "1"
 NVCC_FLAGS = os.getenv("NVCC_FLAGS", None)
 # Note: the GPU video decoding stuff used to be called "video codec", which

diff --git a/test/smoke_test.py b/test/smoke_test.py
@@ -30,6 +30,9 @@ def smoke_test_torchvision_read_decode() -> None:
     img_webp = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.webp"))
     if img_webp.shape != (3, 100, 100):
         raise RuntimeError(f"Unexpected shape of img_webp: {img_webp.shape}")
+    img_avif = read_image(str(SCRIPT_DIR / "assets/fakedata/logos/rgb_pytorch.avif"))
+    if img_avif.shape != (3, 100, 100):
+        raise RuntimeError(f"Unexpected shape of img_avif: {img_avif.shape}")
 
 
 def smoke_test_torchvision_decode_jpeg(device: str = "cpu"):

diff --git a/test/test_image.py b/test/test_image.py
@@ -14,7 +14,7 @@
 from common_utils import assert_equal, cpu_and_cuda, IN_OSS_CI, needs_cuda
 from PIL import __version__ as PILLOW_VERSION, Image, ImageOps, ImageSequence
 from torchvision.io.image import (
-    _decode_avif,
+    decode_avif,
     decode_gif,
     decode_image,
     decode_jpeg,
@@ -863,7 +863,7 @@ def test_decode_gif(tmpdir, name, scripted):
             torch.testing.assert_close(tv_frame, pil_frame, atol=0, rtol=0)
 
 
-@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp))
+@pytest.mark.parametrize("decode_fun", (decode_gif, decode_webp, decode_avif))
 def test_decode_gif_webp_errors(decode_fun):
     encoded_data = torch.randint(0, 256, (100,), dtype=torch.uint8)
     with pytest.raises(RuntimeError, match="Input tensor must be 1-dimensional"):
@@ -876,6 +876,8 @@ def test_decode_gif_webp_errors(decode_fun):
         expected_match = re.escape("DGifOpenFileName() failed - 103")
     elif decode_fun is decode_webp:
         expected_match = "WebPGetFeatures failed."
+    else:
+        expected_match = "avifDecoderParse failed: BMFF parsing failed"
     with pytest.raises(RuntimeError, match=expected_match):
         decode_fun(encoded_data)
 
@@ -916,8 +918,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
     assert_equal(img, from_pil)
 
 
-@pytest.mark.xfail(reason="AVIF support not enabled yet.")
-@pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
+@pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
 @pytest.mark.parametrize("scripted", (False, True))
 def test_decode_avif(decode_fun, scripted):
     encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
@@ -928,11 +929,12 @@ def test_decode_avif(decode_fun, scripted):
     assert img[None].is_contiguous(memory_format=torch.channels_last)
 
 
-@pytest.mark.xfail(reason="AVIF support not enabled yet.")
+# Run on avif files from https://github.com/AOMediaCodec/libavif/tree/main/tests/data
+@pytest.mark.skip(reason="Need to download test images first")
 # Note: decode_image fails because some of these files have a (valid) signature
 # we don't recognize. We should probably use libmagic....
 # @pytest.mark.parametrize("decode_fun", (_decode_avif, decode_image))
-@pytest.mark.parametrize("decode_fun", (_decode_avif,))
+@pytest.mark.parametrize("decode_fun", (decode_avif,))
 @pytest.mark.parametrize("scripted", (False, True))
 @pytest.mark.parametrize(
     "mode, pil_mode",

diff --git a/torchvision/io/__init__.py b/torchvision/io/__init__.py
@@ -22,6 +22,7 @@
     VideoMetaData,
 )
 from .image import (
+    decode_avif,
     decode_gif,
     decode_image,
     decode_jpeg,
@@ -63,6 +64,7 @@
     "decode_png",
     "decode_webp",
     "decode_gif",
+    "decode_avif",
     "encode_jpeg",
     "encode_png",
     "read_file",

diff --git a/torchvision/io/image.py b/torchvision/io/image.py
@@ -392,7 +392,7 @@ def decode_webp(
     return torch.ops.image.decode_webp(input, mode.value)
 
 
-def _decode_avif(
+def decode_avif(
     input: torch.Tensor,
     mode: ImageReadMode = ImageReadMode.UNCHANGED,
 ) -> torch.Tensor: