Handle GIFs correct in gr.Image preprocessing (#8589)

* handle gifs correct in image preprocessing * add changeset * fix * add test * add test * docstring * add docs * image * revert * change * add changeset --------- Co-authored-by: gradio-pr-bot <[email protected]>
gradio-app · Jun 20, 2024 · 34430b9 · 34430b9
1 parent 797621b
commit 34430b9
Show file tree

Hide file tree

Showing 6 changed files with 61 additions and 47 deletions.
diff --git a/.changeset/fluffy-crabs-sleep.md b/.changeset/fluffy-crabs-sleep.md
@@ -0,0 +1,6 @@
+---
+"gradio": patch
+"website": patch
+---
+
+fix:Handle GIFs correct in `gr.Image` preprocessing
diff --git a/gradio/components/image.py b/gradio/components/image.py
@@ -71,12 +71,12 @@ def __init__(
         """
         Parameters:
             value: A PIL Image, numpy array, path or URL for the default value that Image component is going to take. If callable, the function will be called whenever the app loads to set the initial value of the component.
-            format: Format to save image if it does not already have a valid format (e.g. if the image is being returned to the frontend as a numpy array or PIL Image).  The format should be supported by the PIL library. This parameter has no effect on SVG files.
+            format: File format (e.g. "png" or "gif") to save image if it does not already have a valid format (e.g. if the image is being returned to the frontend as a numpy array or PIL Image).  The format should be supported by the PIL library. This parameter has no effect on SVG files.
             height: The height of the displayed image, specified in pixels if a number is passed, or in CSS units if a string is passed.
             width: The width of the displayed image, specified in pixels if a number is passed, or in CSS units if a string is passed.
-            image_mode: "RGB" if color, or "L" if black and white. See https://pillow.readthedocs.io/en/stable/handbook/concepts.html for other supported image modes and their meaning.
+            image_mode: "RGB" if color, or "L" if black and white. See https://pillow.readthedocs.io/en/stable/handbook/concepts.html for other supported image modes and their meaning. This parameter has no effect on SVG or GIF files.
             sources: List of sources for the image. "upload" creates a box where user can drop an image file, "webcam" allows user to take snapshot from their webcam, "clipboard" allows users to paste an image from the clipboard. If None, defaults to ["upload", "webcam", "clipboard"] if streaming is False, otherwise defaults to ["webcam"].
-            type: The format the image is converted before being passed into the prediction function. "numpy" converts the image to a numpy array with shape (height, width, 3) and values from 0 to 255, "pil" converts the image to a PIL image object, "filepath" passes a str path to a temporary file containing the image. If the image is SVG, the `type` is ignored and the filepath of the SVG is returned.
+            type: The format the image is converted before being passed into the prediction function. "numpy" converts the image to a numpy array with shape (height, width, 3) and values from 0 to 255, "pil" converts the image to a PIL image object, "filepath" passes a str path to a temporary file containing the image. If the image is SVG, the `type` is ignored and the filepath of the SVG is returned. To support animated GIFs in input, the `type` should be set to "filepath" or "pil".
             label: The label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.
             every: If `value` is a callable, run the function 'every' number of seconds while the client connection is open. Has no effect otherwise. The event can be accessed (e.g. to cancel it) via this component's .load_event attribute.
             show_label: if True, will display label.
@@ -181,9 +181,10 @@ def preprocess(
                 warnings.warn(
                     f"Failed to transpose image {file_path} based on EXIF data."
                 )
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            im = im.convert(self.image_mode)
+        if suffix.lower() != "gif" and im is not None:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                im = im.convert(self.image_mode)
         return image_utils.format_image(
             im,
             cast(Literal["numpy", "pil", "filepath"], self.type),

diff --git a/gradio/processing_utils.py b/gradio/processing_utils.py
@@ -17,7 +17,7 @@
 import httpx
 import numpy as np
 from gradio_client import utils as client_utils
-from PIL import Image, ImageOps, PngImagePlugin
+from PIL import Image, ImageOps, ImageSequence, PngImagePlugin
 
 from gradio import utils, wasm_utils
 from gradio.data_classes import FileData, GradioModel, GradioRootModel, JsonData
@@ -138,7 +138,7 @@ def encode_plot_to_base64(plt, format: str = "png"):
         plt.savefig(output_bytes, format=fmt)
         bytes_data = output_bytes.getvalue()
     base64_str = str(base64.b64encode(bytes_data), "utf-8")
-    return output_base64(base64_str, fmt)
+    return f"data:image/{format or 'png'};base64,{base64_str}"
 
 
 def get_pil_exif_bytes(pil_image):
@@ -158,34 +158,25 @@ def get_pil_metadata(pil_image):
 
 def encode_pil_to_bytes(pil_image, format="png"):
     with BytesIO() as output_bytes:
-        if format == "png":
-            params = {"pnginfo": get_pil_metadata(pil_image)}
+        if format.lower() == "gif":
+            frames = [frame.copy() for frame in ImageSequence.Iterator(pil_image)]
+            frames[0].save(
+                output_bytes,
+                format=format,
+                save_all=True,
+                append_images=frames[1:],
+                loop=0,
+            )
         else:
-            exif = get_pil_exif_bytes(pil_image)
-            params = {"exif": exif} if exif else {}
-        pil_image.save(output_bytes, format, **params)
+            if format.lower() == "png":
+                params = {"pnginfo": get_pil_metadata(pil_image)}
+            else:
+                exif = get_pil_exif_bytes(pil_image)
+                params = {"exif": exif} if exif else {}
+            pil_image.save(output_bytes, format, **params)
         return output_bytes.getvalue()
 
 
-def encode_pil_to_base64(pil_image, format="png"):
-    bytes_data = encode_pil_to_bytes(pil_image, format)
-    base64_str = str(base64.b64encode(bytes_data), "utf-8")
-    return output_base64(base64_str, format)
-
-
-def encode_array_to_base64(image_array, format="png"):
-    with BytesIO() as output_bytes:
-        pil_image = Image.fromarray(_convert(image_array, np.uint8, force_copy=False))
-        pil_image.save(output_bytes, format)
-        bytes_data = output_bytes.getvalue()
-    base64_str = str(base64.b64encode(bytes_data), "utf-8")
-    return output_base64(base64_str, format)
-
-
-def output_base64(data, format=None) -> str:
-    return f"data:image/{format or 'png'};base64,{data}"
-
-
 def hash_file(file_path: str | Path, chunk_num_blocks: int = 128) -> str:
     sha1 = hashlib.sha1()
     with open(file_path, "rb") as f:

diff --git a/gradio/test_data/rectangles.gif b/gradio/test_data/rectangles.gif
diff --git a/js/_website/src/lib/templates/gradio/03_components/image.svx b/js/_website/src/lib/templates/gradio/03_components/image.svx
@@ -69,6 +69,29 @@ def predict(···) -> np.ndarray | PIL.Image.Image | str | Path | None
 <ShortcutTable shortcuts={obj.string_shortcuts} />
 {/if}
 
+
+### `GIF` and `SVG` Image Formats
+
+The `gr.Image` component can process or display any image format that is [supported by the PIL library](https://pillow.readthedocs.io/en/stable/handbook/image-file-formats.html), including animated GIFs. In addition, it also supports the SVG image format. 
+
+When the `gr.Image` component is used as an input component, the image is converted into a `str` filepath, a `PIL.Image` object, or a `numpy.array`, depending on the `type` parameter. However, animated GIF and SVG images are treated differently:
+
+* Animated `GIF` images can only be converted to `str` filepaths or `PIL.Image` objects. If they are converted to a `numpy.array` (which is the default behavior), only the first frame will be used. So if your demo expects an input `GIF` image, make sure to set the `type` parameter accordingly, e.g.
+
+```py
+import gradio as gr
+
+demo = gr.Interface(
+    fn=lambda x:x, 
+    inputs=gr.Image(type="filepath"), 
+    outputs=gr.Image()
+)
+
+demo.launch()
+```
+
+* For `SVG` images, the `type` parameter is ignored altogether and the image is always returned as an image filepath. This is because `SVG` images cannot be processed as `PIL.Image` or `numpy.array` objects.
+
 {#if obj.demos && obj.demos.length > 0}
 <!--- Demos -->
 ### Demos 

diff --git a/test/test_processing_utils.py b/test/test_processing_utils.py
@@ -1,7 +1,6 @@
 import os
 import shutil
 import tempfile
-from copy import deepcopy
 from pathlib import Path
 from unittest.mock import patch
 
@@ -114,20 +113,6 @@ def test_encode_plot_to_base64(self):
             "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAo"
         )
 
-    def test_encode_array_to_base64(self):
-        img = Image.open("gradio/test_data/test_image.png")
-        img = img.convert("RGB")
-        numpy_data = np.asarray(img, dtype=np.uint8)
-        output_base64 = processing_utils.encode_array_to_base64(numpy_data)
-        assert output_base64 == deepcopy(media_data.ARRAY_TO_BASE64_IMAGE)
-
-    def test_encode_pil_to_base64(self):
-        img = Image.open("gradio/test_data/test_image.png")
-        img = img.convert("RGB")
-        img.info = {}  # Strip metadata
-        output_base64 = processing_utils.encode_pil_to_base64(img)
-        assert output_base64 == deepcopy(media_data.ARRAY_TO_BASE64_IMAGE)
-
     def test_save_pil_to_file_keeps_pnginfo(self, gradio_temp_dir):
         input_img = Image.open("gradio/test_data/test_image.png")
         input_img = input_img.convert("RGB")
@@ -141,6 +126,14 @@ def test_save_pil_to_file_keeps_pnginfo(self, gradio_temp_dir):
 
         assert output_img.info == input_img.info
 
+    def test_save_pil_to_file_keeps_all_gif_frames(self, gradio_temp_dir):
+        input_img = Image.open("gradio/test_data/rectangles.gif")
+        file_obj = processing_utils.save_pil_to_cache(
+            input_img, cache_dir=gradio_temp_dir, format="gif"
+        )
+        output_img = Image.open(file_obj)
+        assert output_img.n_frames == input_img.n_frames == 3
+
     def test_np_pil_encode_to_the_same(self, gradio_temp_dir):
         arr = np.random.randint(0, 255, size=(100, 100, 3), dtype=np.uint8)
         pil = Image.fromarray(arr)