From 5b4a3fa50f9a35cf77b9b1703bef9f2c57e1a673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Freitag?= Date: Tue, 19 Nov 2019 18:05:29 +0100 Subject: [PATCH] Resize images to fit within the page usable area To specify an image size, use the `width` and `height` attributes on the `` element. Rich text editors like TinyMCE use these attribute to control the image size. Though the HTML attributes are unit-less, interpret them as pixels to allow conversion between attribute values and inches, the latter being used as measurement unit in docx document. When images go beyond the writable text area, resize them to fit within the page. The usable area was determined by resizing an image with LibreOffice in the default template document. The maximum height for an image in a document is capped to 8.1 inches. Without DPI information in most images, the default value of 72 DPI is used. https://python-docx.readthedocs.io/en/latest/api/document.html#docx.document.Document.add_picture Reviewed-by: Jon Dufresne Reviewed-by: Roman Danilov --- html2docx/html2docx.py | 10 +++- html2docx/image.py | 50 ++++++++++++++++ tests/data/img_height.html | 1 + tests/data/img_height.json | 17 ++++++ tests/data/img_max_height.html | 1 + tests/data/img_max_height.json | 17 ++++++ tests/data/img_max_width.html | 1 + tests/data/img_max_width.json | 17 ++++++ tests/data/img_ratio.html | 1 + tests/data/img_ratio.json | 17 ++++++ tests/data/img_width.html | 1 + tests/data/img_width.json | 17 ++++++ tests/test_image_size.py | 101 +++++++++++++++++++++++++++++++++ tox.ini | 4 +- 14 files changed, 252 insertions(+), 3 deletions(-) create mode 100644 tests/data/img_height.html create mode 100644 tests/data/img_height.json create mode 100644 tests/data/img_max_height.html create mode 100644 tests/data/img_max_height.json create mode 100644 tests/data/img_max_width.html create mode 100644 tests/data/img_max_width.json create mode 100644 tests/data/img_ratio.html create mode 100644 tests/data/img_ratio.json create mode 100644 tests/data/img_width.html create mode 100644 tests/data/img_width.json create mode 100644 tests/test_image_size.py diff --git a/html2docx/html2docx.py b/html2docx/html2docx.py index cc0c47d..35bd97a 100644 --- a/html2docx/html2docx.py +++ b/html2docx/html2docx.py @@ -10,7 +10,7 @@ from tinycss2 import parse_declaration_list from tinycss2.ast import DimensionToken, IdentToken -from .image import load_image +from .image import image_size, load_image WHITESPACE_RE = re.compile(r"\s+") @@ -130,8 +130,14 @@ def add_list_style(self, name: str) -> None: def add_picture(self, attrs: List[Tuple[str, Optional[str]]]) -> None: src = get_attr(attrs, "src") + height_attr = get_attr(attrs, "height") + width_attr = get_attr(attrs, "width") + height_px = int(height_attr) if height_attr else None + width_px = int(width_attr) if width_attr else None + image_buffer = load_image(src) - self.doc.add_picture(image_buffer) + size = image_size(image_buffer, width_px, height_px) + self.doc.add_picture(image_buffer, **size) def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: if tag == "a": diff --git a/html2docx/image.py b/html2docx/image.py index f760686..145808d 100644 --- a/html2docx/image.py +++ b/html2docx/image.py @@ -4,9 +4,16 @@ import time import urllib.error import urllib.request +from typing import Dict, Optional from docx.image.exceptions import UnrecognizedImageError from docx.image.image import Image +from docx.shared import Inches + +# The usable size is the space inside the default template margins. +# In LibreOffice, the maximum height for an image is capped to USABLE_HEIGHT. +USABLE_HEIGHT = Inches(8.1) +USABLE_WIDTH = Inches(5.8) MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MiB @@ -49,3 +56,46 @@ def load_image(src: str) -> io.BytesIO: image_buffer = io.BytesIO(broken_img_path.read_bytes()) return image_buffer + + +def image_size( + image_buffer: io.BytesIO, + width_px: Optional[int] = None, + height_px: Optional[int] = None, +) -> Dict[str, int]: + """ + Compute width and height to feed python-docx so that image is contained in the page + and respects width_px and height_px. + + Return: + Empty: No resize + Single dimension (width or height): image ratio is expected to be maintained + Two dimensions (width and height): image should be resized to dimensions + """ + image = Image.from_blob(image_buffer.getbuffer()) + + height = image.px_height if height_px is None else height_px + width = image.px_width if width_px is None else width_px + + height = Inches(height / image.vert_dpi) + width = Inches(width / image.horz_dpi) + + size = {} + if width > USABLE_WIDTH: + new_height = round(image.px_height / (image.px_width / USABLE_WIDTH)) + if new_height > USABLE_HEIGHT: + size["height"] = USABLE_HEIGHT + else: + size["width"] = USABLE_WIDTH + elif height > USABLE_HEIGHT: + new_width = round(image.px_width / (image.px_height / USABLE_HEIGHT)) + if new_width > USABLE_WIDTH: + size["width"] = USABLE_WIDTH + else: + size["height"] = USABLE_HEIGHT + else: + if width_px is not None: + size["width"] = width + if height_px is not None: + size["height"] = height + return size diff --git a/tests/data/img_height.html b/tests/data/img_height.html new file mode 100644 index 0000000..8afb7ec --- /dev/null +++ b/tests/data/img_height.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_height.json b/tests/data/img_height.json new file mode 100644 index 0000000..182d0c7 --- /dev/null +++ b/tests/data/img_height.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 2857500, + "height": 2857500 + } + ] + } + ] + } +] diff --git a/tests/data/img_max_height.html b/tests/data/img_max_height.html new file mode 100644 index 0000000..efa4e1c --- /dev/null +++ b/tests/data/img_max_height.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_max_height.json b/tests/data/img_max_height.json new file mode 100644 index 0000000..d287082 --- /dev/null +++ b/tests/data/img_max_height.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 5303520, + "height": 5303520 + } + ] + } + ] + } +] diff --git a/tests/data/img_max_width.html b/tests/data/img_max_width.html new file mode 100644 index 0000000..8ef80dd --- /dev/null +++ b/tests/data/img_max_width.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_max_width.json b/tests/data/img_max_width.json new file mode 100644 index 0000000..d287082 --- /dev/null +++ b/tests/data/img_max_width.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 5303520, + "height": 5303520 + } + ] + } + ] + } +] diff --git a/tests/data/img_ratio.html b/tests/data/img_ratio.html new file mode 100644 index 0000000..9a60f7b --- /dev/null +++ b/tests/data/img_ratio.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_ratio.json b/tests/data/img_ratio.json new file mode 100644 index 0000000..9f09d34 --- /dev/null +++ b/tests/data/img_ratio.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 190500, + "height": 95250 + } + ] + } + ] + } +] diff --git a/tests/data/img_width.html b/tests/data/img_width.html new file mode 100644 index 0000000..778c2a0 --- /dev/null +++ b/tests/data/img_width.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_width.json b/tests/data/img_width.json new file mode 100644 index 0000000..182d0c7 --- /dev/null +++ b/tests/data/img_width.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 2857500, + "height": 2857500 + } + ] + } + ] + } +] diff --git a/tests/test_image_size.py b/tests/test_image_size.py new file mode 100644 index 0000000..54c9544 --- /dev/null +++ b/tests/test_image_size.py @@ -0,0 +1,101 @@ +from io import BytesIO +from math import ceil + +from docx.shared import Inches +from PIL import Image + +from html2docx.image import USABLE_HEIGHT, USABLE_WIDTH, image_size + +from .utils import PROJECT_DIR + +broken_image = PROJECT_DIR / "html2docx" / "image-broken.png" +broken_image_bytes = broken_image.read_bytes() +DPI = 72 + + +def inches_to_px(inches: int, dpi: int = DPI) -> int: + return ceil(inches / Inches(1) * dpi) + + +def px_to_inches(px: int, dpi: int = DPI) -> int: + return ceil(px * Inches(1) / dpi) + + +def generate_image(width: int, height: int, dpi=(DPI, DPI)) -> BytesIO: + data = BytesIO() + with Image.new("L", (width, height)) as image: + image.save(data, format="png", dpi=dpi) + return data + + +def test_one_px(): + image = generate_image(width=1, height=1) + size = image_size(image, 1, 1) + side = px_to_inches(1) + assert size == {"width": side, "height": side} + + +def test_upscale(): + image = generate_image(width=1, height=1) + size = image_size(image, width_px=2, height_px=2) + side = px_to_inches(2) + assert size == {"width": side, "height": side} + + +def test_downscale(): + image = generate_image(width=2, height=2) + size = image_size(image, width_px=1, height_px=1) + side = px_to_inches(1) + assert size == {"width": side, "height": side} + + +def test_image_larger_than_usable_width(): + image = generate_image(width=inches_to_px(USABLE_WIDTH) + 1, height=1) + size = image_size(image) + assert size == {"width": USABLE_WIDTH} + + +def test_image_taller_than_usable_height(): + image = generate_image(width=1, height=inches_to_px(USABLE_HEIGHT) + 1) + size = image_size(image) + assert size == {"height": USABLE_HEIGHT} + + +def test_size_larger_than_usable_width(): + image = generate_image(width=100, height=1) + max_width_px = inches_to_px(USABLE_WIDTH) + 1 + size = image_size(image, width_px=max_width_px) + assert size == {"width": USABLE_WIDTH} + + +def test_size_taller_than_usable_height(): + image = generate_image(width=1, height=100) + max_height_px = inches_to_px(USABLE_HEIGHT) + 1 + size = image_size(image, height_px=max_height_px) + assert size == {"height": USABLE_HEIGHT} + + +def test_resize_exceeds_width(): + image = generate_image(width=1, height=1) + size = image_size(image, height_px=inches_to_px(USABLE_HEIGHT)) + assert size == {"width": USABLE_WIDTH} + + +def test_resize_exceeds_height(): + image = generate_image(width=1, height=2) + size = image_size(image, width_px=inches_to_px(USABLE_WIDTH)) + assert size == {"height": USABLE_HEIGHT} + + +def test_dpi_width(): + width_px = inches_to_px(USABLE_WIDTH, 300) + image = generate_image(width=width_px, height=1, dpi=(300, 300)) + size = image_size(image) + assert size == {} + + +def test_dpi_height(): + height_px = inches_to_px(USABLE_HEIGHT, 300) + image = generate_image(width=1, height=height_px, dpi=(300, 300)) + size = image_size(image) + assert size == {} diff --git a/tox.ini b/tox.ini index 1c9b595..3d964cd 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,9 @@ minversion = 1.9 [testenv] commands = pytest {posargs} -deps = pytest +deps = + Pillow + pytest [testenv:black] commands = black --target-version=py36 --check --diff .