diff --git a/html2docx/html2docx.py b/html2docx/html2docx.py index cc0c47d..35bd97a 100644 --- a/html2docx/html2docx.py +++ b/html2docx/html2docx.py @@ -10,7 +10,7 @@ from tinycss2 import parse_declaration_list from tinycss2.ast import DimensionToken, IdentToken -from .image import load_image +from .image import image_size, load_image WHITESPACE_RE = re.compile(r"\s+") @@ -130,8 +130,14 @@ def add_list_style(self, name: str) -> None: def add_picture(self, attrs: List[Tuple[str, Optional[str]]]) -> None: src = get_attr(attrs, "src") + height_attr = get_attr(attrs, "height") + width_attr = get_attr(attrs, "width") + height_px = int(height_attr) if height_attr else None + width_px = int(width_attr) if width_attr else None + image_buffer = load_image(src) - self.doc.add_picture(image_buffer) + size = image_size(image_buffer, width_px, height_px) + self.doc.add_picture(image_buffer, **size) def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: if tag == "a": diff --git a/html2docx/image.py b/html2docx/image.py index f760686..145808d 100644 --- a/html2docx/image.py +++ b/html2docx/image.py @@ -4,9 +4,16 @@ import time import urllib.error import urllib.request +from typing import Dict, Optional from docx.image.exceptions import UnrecognizedImageError from docx.image.image import Image +from docx.shared import Inches + +# The usable size is the space inside the default template margins. +# In LibreOffice, the maximum height for an image is capped to USABLE_HEIGHT. +USABLE_HEIGHT = Inches(8.1) +USABLE_WIDTH = Inches(5.8) MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MiB @@ -49,3 +56,46 @@ def load_image(src: str) -> io.BytesIO: image_buffer = io.BytesIO(broken_img_path.read_bytes()) return image_buffer + + +def image_size( + image_buffer: io.BytesIO, + width_px: Optional[int] = None, + height_px: Optional[int] = None, +) -> Dict[str, int]: + """ + Compute width and height to feed python-docx so that image is contained in the page + and respects width_px and height_px. + + Return: + Empty: No resize + Single dimension (width or height): image ratio is expected to be maintained + Two dimensions (width and height): image should be resized to dimensions + """ + image = Image.from_blob(image_buffer.getbuffer()) + + height = image.px_height if height_px is None else height_px + width = image.px_width if width_px is None else width_px + + height = Inches(height / image.vert_dpi) + width = Inches(width / image.horz_dpi) + + size = {} + if width > USABLE_WIDTH: + new_height = round(image.px_height / (image.px_width / USABLE_WIDTH)) + if new_height > USABLE_HEIGHT: + size["height"] = USABLE_HEIGHT + else: + size["width"] = USABLE_WIDTH + elif height > USABLE_HEIGHT: + new_width = round(image.px_width / (image.px_height / USABLE_HEIGHT)) + if new_width > USABLE_WIDTH: + size["width"] = USABLE_WIDTH + else: + size["height"] = USABLE_HEIGHT + else: + if width_px is not None: + size["width"] = width + if height_px is not None: + size["height"] = height + return size diff --git a/tests/data/img_height.html b/tests/data/img_height.html new file mode 100644 index 0000000..8afb7ec --- /dev/null +++ b/tests/data/img_height.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_height.json b/tests/data/img_height.json new file mode 100644 index 0000000..182d0c7 --- /dev/null +++ b/tests/data/img_height.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 2857500, + "height": 2857500 + } + ] + } + ] + } +] diff --git a/tests/data/img_max_height.html b/tests/data/img_max_height.html new file mode 100644 index 0000000..efa4e1c --- /dev/null +++ b/tests/data/img_max_height.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_max_height.json b/tests/data/img_max_height.json new file mode 100644 index 0000000..d287082 --- /dev/null +++ b/tests/data/img_max_height.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 5303520, + "height": 5303520 + } + ] + } + ] + } +] diff --git a/tests/data/img_max_width.html b/tests/data/img_max_width.html new file mode 100644 index 0000000..8ef80dd --- /dev/null +++ b/tests/data/img_max_width.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_max_width.json b/tests/data/img_max_width.json new file mode 100644 index 0000000..d287082 --- /dev/null +++ b/tests/data/img_max_width.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 5303520, + "height": 5303520 + } + ] + } + ] + } +] diff --git a/tests/data/img_ratio.html b/tests/data/img_ratio.html new file mode 100644 index 0000000..9a60f7b --- /dev/null +++ b/tests/data/img_ratio.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_ratio.json b/tests/data/img_ratio.json new file mode 100644 index 0000000..9f09d34 --- /dev/null +++ b/tests/data/img_ratio.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 190500, + "height": 95250 + } + ] + } + ] + } +] diff --git a/tests/data/img_width.html b/tests/data/img_width.html new file mode 100644 index 0000000..778c2a0 --- /dev/null +++ b/tests/data/img_width.html @@ -0,0 +1 @@ + diff --git a/tests/data/img_width.json b/tests/data/img_width.json new file mode 100644 index 0000000..182d0c7 --- /dev/null +++ b/tests/data/img_width.json @@ -0,0 +1,17 @@ +[ + { + "text": "", + "runs": [ + { + "text": "", + "shapes": [ + { + "type": 3, + "width": 2857500, + "height": 2857500 + } + ] + } + ] + } +] diff --git a/tests/test_image_size.py b/tests/test_image_size.py new file mode 100644 index 0000000..54c9544 --- /dev/null +++ b/tests/test_image_size.py @@ -0,0 +1,101 @@ +from io import BytesIO +from math import ceil + +from docx.shared import Inches +from PIL import Image + +from html2docx.image import USABLE_HEIGHT, USABLE_WIDTH, image_size + +from .utils import PROJECT_DIR + +broken_image = PROJECT_DIR / "html2docx" / "image-broken.png" +broken_image_bytes = broken_image.read_bytes() +DPI = 72 + + +def inches_to_px(inches: int, dpi: int = DPI) -> int: + return ceil(inches / Inches(1) * dpi) + + +def px_to_inches(px: int, dpi: int = DPI) -> int: + return ceil(px * Inches(1) / dpi) + + +def generate_image(width: int, height: int, dpi=(DPI, DPI)) -> BytesIO: + data = BytesIO() + with Image.new("L", (width, height)) as image: + image.save(data, format="png", dpi=dpi) + return data + + +def test_one_px(): + image = generate_image(width=1, height=1) + size = image_size(image, 1, 1) + side = px_to_inches(1) + assert size == {"width": side, "height": side} + + +def test_upscale(): + image = generate_image(width=1, height=1) + size = image_size(image, width_px=2, height_px=2) + side = px_to_inches(2) + assert size == {"width": side, "height": side} + + +def test_downscale(): + image = generate_image(width=2, height=2) + size = image_size(image, width_px=1, height_px=1) + side = px_to_inches(1) + assert size == {"width": side, "height": side} + + +def test_image_larger_than_usable_width(): + image = generate_image(width=inches_to_px(USABLE_WIDTH) + 1, height=1) + size = image_size(image) + assert size == {"width": USABLE_WIDTH} + + +def test_image_taller_than_usable_height(): + image = generate_image(width=1, height=inches_to_px(USABLE_HEIGHT) + 1) + size = image_size(image) + assert size == {"height": USABLE_HEIGHT} + + +def test_size_larger_than_usable_width(): + image = generate_image(width=100, height=1) + max_width_px = inches_to_px(USABLE_WIDTH) + 1 + size = image_size(image, width_px=max_width_px) + assert size == {"width": USABLE_WIDTH} + + +def test_size_taller_than_usable_height(): + image = generate_image(width=1, height=100) + max_height_px = inches_to_px(USABLE_HEIGHT) + 1 + size = image_size(image, height_px=max_height_px) + assert size == {"height": USABLE_HEIGHT} + + +def test_resize_exceeds_width(): + image = generate_image(width=1, height=1) + size = image_size(image, height_px=inches_to_px(USABLE_HEIGHT)) + assert size == {"width": USABLE_WIDTH} + + +def test_resize_exceeds_height(): + image = generate_image(width=1, height=2) + size = image_size(image, width_px=inches_to_px(USABLE_WIDTH)) + assert size == {"height": USABLE_HEIGHT} + + +def test_dpi_width(): + width_px = inches_to_px(USABLE_WIDTH, 300) + image = generate_image(width=width_px, height=1, dpi=(300, 300)) + size = image_size(image) + assert size == {} + + +def test_dpi_height(): + height_px = inches_to_px(USABLE_HEIGHT, 300) + image = generate_image(width=1, height=height_px, dpi=(300, 300)) + size = image_size(image) + assert size == {} diff --git a/tox.ini b/tox.ini index 1c9b595..3d964cd 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,9 @@ minversion = 1.9 [testenv] commands = pytest {posargs} -deps = pytest +deps = + Pillow + pytest [testenv:black] commands = black --target-version=py36 --check --diff .