diff --git a/html2docx/html2docx.py b/html2docx/html2docx.py
index cc0c47d..35bd97a 100644
--- a/html2docx/html2docx.py
+++ b/html2docx/html2docx.py
@@ -10,7 +10,7 @@
from tinycss2 import parse_declaration_list
from tinycss2.ast import DimensionToken, IdentToken
-from .image import load_image
+from .image import image_size, load_image
WHITESPACE_RE = re.compile(r"\s+")
@@ -130,8 +130,14 @@ def add_list_style(self, name: str) -> None:
def add_picture(self, attrs: List[Tuple[str, Optional[str]]]) -> None:
src = get_attr(attrs, "src")
+ height_attr = get_attr(attrs, "height")
+ width_attr = get_attr(attrs, "width")
+ height_px = int(height_attr) if height_attr else None
+ width_px = int(width_attr) if width_attr else None
+
image_buffer = load_image(src)
- self.doc.add_picture(image_buffer)
+ size = image_size(image_buffer, width_px, height_px)
+ self.doc.add_picture(image_buffer, **size)
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
if tag == "a":
diff --git a/html2docx/image.py b/html2docx/image.py
index f760686..145808d 100644
--- a/html2docx/image.py
+++ b/html2docx/image.py
@@ -4,9 +4,16 @@
import time
import urllib.error
import urllib.request
+from typing import Dict, Optional
from docx.image.exceptions import UnrecognizedImageError
from docx.image.image import Image
+from docx.shared import Inches
+
+# The usable size is the space inside the default template margins.
+# In LibreOffice, the maximum height for an image is capped to USABLE_HEIGHT.
+USABLE_HEIGHT = Inches(8.1)
+USABLE_WIDTH = Inches(5.8)
MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MiB
@@ -49,3 +56,46 @@ def load_image(src: str) -> io.BytesIO:
image_buffer = io.BytesIO(broken_img_path.read_bytes())
return image_buffer
+
+
+def image_size(
+ image_buffer: io.BytesIO,
+ width_px: Optional[int] = None,
+ height_px: Optional[int] = None,
+) -> Dict[str, int]:
+ """
+ Compute width and height to feed python-docx so that image is contained in the page
+ and respects width_px and height_px.
+
+ Return:
+ Empty: No resize
+ Single dimension (width or height): image ratio is expected to be maintained
+ Two dimensions (width and height): image should be resized to dimensions
+ """
+ image = Image.from_blob(image_buffer.getbuffer())
+
+ height = image.px_height if height_px is None else height_px
+ width = image.px_width if width_px is None else width_px
+
+ height = Inches(height / image.vert_dpi)
+ width = Inches(width / image.horz_dpi)
+
+ size = {}
+ if width > USABLE_WIDTH:
+ new_height = round(image.px_height / (image.px_width / USABLE_WIDTH))
+ if new_height > USABLE_HEIGHT:
+ size["height"] = USABLE_HEIGHT
+ else:
+ size["width"] = USABLE_WIDTH
+ elif height > USABLE_HEIGHT:
+ new_width = round(image.px_width / (image.px_height / USABLE_HEIGHT))
+ if new_width > USABLE_WIDTH:
+ size["width"] = USABLE_WIDTH
+ else:
+ size["height"] = USABLE_HEIGHT
+ else:
+ if width_px is not None:
+ size["width"] = width
+ if height_px is not None:
+ size["height"] = height
+ return size
diff --git a/tests/data/img_height.html b/tests/data/img_height.html
new file mode 100644
index 0000000..8afb7ec
--- /dev/null
+++ b/tests/data/img_height.html
@@ -0,0 +1 @@
+
diff --git a/tests/data/img_height.json b/tests/data/img_height.json
new file mode 100644
index 0000000..182d0c7
--- /dev/null
+++ b/tests/data/img_height.json
@@ -0,0 +1,17 @@
+[
+ {
+ "text": "",
+ "runs": [
+ {
+ "text": "",
+ "shapes": [
+ {
+ "type": 3,
+ "width": 2857500,
+ "height": 2857500
+ }
+ ]
+ }
+ ]
+ }
+]
diff --git a/tests/data/img_max_height.html b/tests/data/img_max_height.html
new file mode 100644
index 0000000..efa4e1c
--- /dev/null
+++ b/tests/data/img_max_height.html
@@ -0,0 +1 @@
+
diff --git a/tests/data/img_max_height.json b/tests/data/img_max_height.json
new file mode 100644
index 0000000..d287082
--- /dev/null
+++ b/tests/data/img_max_height.json
@@ -0,0 +1,17 @@
+[
+ {
+ "text": "",
+ "runs": [
+ {
+ "text": "",
+ "shapes": [
+ {
+ "type": 3,
+ "width": 5303520,
+ "height": 5303520
+ }
+ ]
+ }
+ ]
+ }
+]
diff --git a/tests/data/img_max_width.html b/tests/data/img_max_width.html
new file mode 100644
index 0000000..8ef80dd
--- /dev/null
+++ b/tests/data/img_max_width.html
@@ -0,0 +1 @@
+
diff --git a/tests/data/img_max_width.json b/tests/data/img_max_width.json
new file mode 100644
index 0000000..d287082
--- /dev/null
+++ b/tests/data/img_max_width.json
@@ -0,0 +1,17 @@
+[
+ {
+ "text": "",
+ "runs": [
+ {
+ "text": "",
+ "shapes": [
+ {
+ "type": 3,
+ "width": 5303520,
+ "height": 5303520
+ }
+ ]
+ }
+ ]
+ }
+]
diff --git a/tests/data/img_ratio.html b/tests/data/img_ratio.html
new file mode 100644
index 0000000..9a60f7b
--- /dev/null
+++ b/tests/data/img_ratio.html
@@ -0,0 +1 @@
+
diff --git a/tests/data/img_ratio.json b/tests/data/img_ratio.json
new file mode 100644
index 0000000..9f09d34
--- /dev/null
+++ b/tests/data/img_ratio.json
@@ -0,0 +1,17 @@
+[
+ {
+ "text": "",
+ "runs": [
+ {
+ "text": "",
+ "shapes": [
+ {
+ "type": 3,
+ "width": 190500,
+ "height": 95250
+ }
+ ]
+ }
+ ]
+ }
+]
diff --git a/tests/data/img_width.html b/tests/data/img_width.html
new file mode 100644
index 0000000..778c2a0
--- /dev/null
+++ b/tests/data/img_width.html
@@ -0,0 +1 @@
+
diff --git a/tests/data/img_width.json b/tests/data/img_width.json
new file mode 100644
index 0000000..182d0c7
--- /dev/null
+++ b/tests/data/img_width.json
@@ -0,0 +1,17 @@
+[
+ {
+ "text": "",
+ "runs": [
+ {
+ "text": "",
+ "shapes": [
+ {
+ "type": 3,
+ "width": 2857500,
+ "height": 2857500
+ }
+ ]
+ }
+ ]
+ }
+]
diff --git a/tests/test_image_size.py b/tests/test_image_size.py
new file mode 100644
index 0000000..54c9544
--- /dev/null
+++ b/tests/test_image_size.py
@@ -0,0 +1,101 @@
+from io import BytesIO
+from math import ceil
+
+from docx.shared import Inches
+from PIL import Image
+
+from html2docx.image import USABLE_HEIGHT, USABLE_WIDTH, image_size
+
+from .utils import PROJECT_DIR
+
+broken_image = PROJECT_DIR / "html2docx" / "image-broken.png"
+broken_image_bytes = broken_image.read_bytes()
+DPI = 72
+
+
+def inches_to_px(inches: int, dpi: int = DPI) -> int:
+ return ceil(inches / Inches(1) * dpi)
+
+
+def px_to_inches(px: int, dpi: int = DPI) -> int:
+ return ceil(px * Inches(1) / dpi)
+
+
+def generate_image(width: int, height: int, dpi=(DPI, DPI)) -> BytesIO:
+ data = BytesIO()
+ with Image.new("L", (width, height)) as image:
+ image.save(data, format="png", dpi=dpi)
+ return data
+
+
+def test_one_px():
+ image = generate_image(width=1, height=1)
+ size = image_size(image, 1, 1)
+ side = px_to_inches(1)
+ assert size == {"width": side, "height": side}
+
+
+def test_upscale():
+ image = generate_image(width=1, height=1)
+ size = image_size(image, width_px=2, height_px=2)
+ side = px_to_inches(2)
+ assert size == {"width": side, "height": side}
+
+
+def test_downscale():
+ image = generate_image(width=2, height=2)
+ size = image_size(image, width_px=1, height_px=1)
+ side = px_to_inches(1)
+ assert size == {"width": side, "height": side}
+
+
+def test_image_larger_than_usable_width():
+ image = generate_image(width=inches_to_px(USABLE_WIDTH) + 1, height=1)
+ size = image_size(image)
+ assert size == {"width": USABLE_WIDTH}
+
+
+def test_image_taller_than_usable_height():
+ image = generate_image(width=1, height=inches_to_px(USABLE_HEIGHT) + 1)
+ size = image_size(image)
+ assert size == {"height": USABLE_HEIGHT}
+
+
+def test_size_larger_than_usable_width():
+ image = generate_image(width=100, height=1)
+ max_width_px = inches_to_px(USABLE_WIDTH) + 1
+ size = image_size(image, width_px=max_width_px)
+ assert size == {"width": USABLE_WIDTH}
+
+
+def test_size_taller_than_usable_height():
+ image = generate_image(width=1, height=100)
+ max_height_px = inches_to_px(USABLE_HEIGHT) + 1
+ size = image_size(image, height_px=max_height_px)
+ assert size == {"height": USABLE_HEIGHT}
+
+
+def test_resize_exceeds_width():
+ image = generate_image(width=1, height=1)
+ size = image_size(image, height_px=inches_to_px(USABLE_HEIGHT))
+ assert size == {"width": USABLE_WIDTH}
+
+
+def test_resize_exceeds_height():
+ image = generate_image(width=1, height=2)
+ size = image_size(image, width_px=inches_to_px(USABLE_WIDTH))
+ assert size == {"height": USABLE_HEIGHT}
+
+
+def test_dpi_width():
+ width_px = inches_to_px(USABLE_WIDTH, 300)
+ image = generate_image(width=width_px, height=1, dpi=(300, 300))
+ size = image_size(image)
+ assert size == {}
+
+
+def test_dpi_height():
+ height_px = inches_to_px(USABLE_HEIGHT, 300)
+ image = generate_image(width=1, height=height_px, dpi=(300, 300))
+ size = image_size(image)
+ assert size == {}
diff --git a/tox.ini b/tox.ini
index 1c9b595..3d964cd 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,7 +9,9 @@ minversion = 1.9
[testenv]
commands = pytest {posargs}
-deps = pytest
+deps =
+ Pillow
+ pytest
[testenv:black]
commands = black --target-version=py36 --check --diff .