-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Basic support for inline images in documents
Broken image from https://freesvg.org/broken-icon, licensed under CC0 (Creative commons, “No Rights Reserved”). Reviewed-by: Jon Dufresne Reviewed-by: Roman Danilov
- Loading branch information
1 parent
a9ef888
commit 0f26c0d
Showing
13 changed files
with
256 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
include LICENSE | ||
include README.md | ||
include tox.ini | ||
include html2docx/image-broken.png | ||
include html2docx/py.typed | ||
recursive-include tests *.html *.json *.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import http | ||
import io | ||
import pathlib | ||
import time | ||
import urllib.error | ||
import urllib.request | ||
|
||
from docx.image.exceptions import UnrecognizedImageError | ||
from docx.image.image import Image | ||
|
||
MAX_IMAGE_SIZE = 10 * 1024 * 1024 # 10 MiB | ||
|
||
|
||
def load_image(src: str) -> io.BytesIO: | ||
image_buffer = None | ||
retry = 3 | ||
while retry and not image_buffer: | ||
try: | ||
with urllib.request.urlopen(src) as response: | ||
size = response.getheader("Content-Length") | ||
if size and int(size) > MAX_IMAGE_SIZE: | ||
break | ||
# Read up to MAX_IMAGE_SIZE when response does not contain | ||
# the Content-Length header. The extra byte avoids an extra read to | ||
# check whether the EOF was reached. | ||
data = response.read(MAX_IMAGE_SIZE + 1) | ||
except (ValueError, http.client.HTTPException, urllib.error.HTTPError): | ||
# ValueError: Invalid URL or non-integer Content-Length. | ||
# HTTPException: Server does not speak HTTP properly. | ||
# HTTPError: Server could not perform request. | ||
retry = 0 | ||
except urllib.error.URLError: | ||
# URLError: Transient network error, e.g. DNS request failed. | ||
retry -= 1 | ||
if retry: | ||
time.sleep(1) | ||
else: | ||
if len(data) <= MAX_IMAGE_SIZE: | ||
image_buffer = io.BytesIO(data) | ||
|
||
if image_buffer: | ||
try: | ||
Image.from_blob(image_buffer.getbuffer()) | ||
except UnrecognizedImageError: | ||
image_buffer = None | ||
|
||
if not image_buffer: | ||
broken_img_path = pathlib.Path(__file__).parent / "image-broken.png" | ||
image_buffer = io.BytesIO(broken_img_path.read_bytes()) | ||
|
||
return image_buffer |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import http.server | ||
import os | ||
import sys | ||
import threading | ||
|
||
import pytest | ||
|
||
from .utils import TEST_DIR | ||
|
||
|
||
class CountingHTTPServer(http.server.HTTPServer): | ||
request_count = 0 | ||
|
||
def finish_request(self, *args, **kwargs): | ||
self.request_count += 1 | ||
return super().finish_request(*args, **kwargs) | ||
|
||
|
||
class HttpServerThread(threading.Thread): | ||
def __init__(self, handler): | ||
super().__init__() | ||
self.is_ready = threading.Event() | ||
self.handler = handler | ||
self.error = None | ||
|
||
def run(self): | ||
try: | ||
self.httpd = CountingHTTPServer(("localhost", 0), self.handler) | ||
port = self.httpd.server_address[1] | ||
self.base_url = f"http://localhost:{port}/" | ||
self.is_ready.set() | ||
self.httpd.serve_forever(poll_interval=0.01) | ||
except Exception as e: | ||
self.error = e | ||
self.is_ready.set() | ||
|
||
def terminate(self): | ||
if hasattr(self, "httpd"): | ||
self.httpd.shutdown() | ||
self.httpd.server_close() | ||
self.join() | ||
|
||
|
||
class ImageHandler(http.server.SimpleHTTPRequestHandler): | ||
def __init__(self, *args, directory=None, **kwargs): | ||
if sys.version_info >= (3, 9): | ||
kwargs["directory"] = TEST_DIR / "images" | ||
elif sys.version_info >= (3, 7): | ||
kwargs["directory"] = os.fspath(TEST_DIR / "images") | ||
super().__init__(*args, **kwargs) | ||
|
||
def translate_path(self, path): | ||
if sys.version_info < (3, 7): | ||
cwd = os.getcwd() | ||
try: | ||
os.chdir(TEST_DIR / "images") | ||
return super().translate_path(path) | ||
finally: | ||
os.chdir(cwd) | ||
return super().translate_path(path) | ||
|
||
|
||
def http_server_thread(handler): | ||
server_thread = HttpServerThread(handler) | ||
server_thread.daemon = True | ||
server_thread.start() | ||
server_thread.is_ready.wait() | ||
yield server_thread | ||
try: | ||
if server_thread.error: | ||
raise server_thread.error | ||
finally: | ||
server_thread.terminate() | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def image_server(): | ||
""" | ||
Start a HTTP server serving test images. | ||
""" | ||
yield from http_server_thread(ImageHandler) | ||
|
||
|
||
class BadContentHandler(http.server.SimpleHTTPRequestHandler): | ||
def do_GET(self): | ||
self.close_connection = True | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def bad_server(): | ||
yield from http_server_thread(BadContentHandler) | ||
|
||
|
||
class BadContentLengthHandler(http.server.SimpleHTTPRequestHandler): | ||
def do_GET(self): | ||
self.send_response(http.HTTPStatus.OK) | ||
self.send_header("Content-Length", "invalid") | ||
self.end_headers() | ||
|
||
|
||
@pytest.fixture(scope="function") | ||
def bad_content_length_server(): | ||
yield from http_server_thread(BadContentLengthHandler) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
<img src="https://via.placeholder.com/1x1.png"> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
[ | ||
{ | ||
"text": "", | ||
"runs": [ | ||
{ | ||
"text": "", | ||
"shapes": [ | ||
{ | ||
"type": 3, | ||
"width": 9525, | ||
"height": 9525 | ||
} | ||
] | ||
} | ||
] | ||
} | ||
] |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import urllib.error | ||
import urllib.request | ||
from unittest import mock | ||
|
||
from html2docx.image import load_image | ||
|
||
from .utils import PROJECT_DIR, TEST_DIR | ||
|
||
broken_image = PROJECT_DIR / "html2docx" / "image-broken.png" | ||
broken_image_bytes = broken_image.read_bytes() | ||
|
||
|
||
def test_basic(image_server): | ||
image_data = load_image(image_server.base_url + "1x1.png") | ||
expected = TEST_DIR / "data" / "1x1.png" | ||
assert image_data.getbuffer() == expected.read_bytes() | ||
|
||
|
||
def test_non_image(image_server): | ||
image_data = load_image(image_server.base_url) | ||
assert image_data.getbuffer() == broken_image_bytes | ||
|
||
|
||
def test_bad_url(): | ||
image_data = load_image("bad") | ||
assert image_data.getbuffer() == broken_image_bytes | ||
|
||
|
||
def test_transient_network_error_retries(): | ||
url = "https://transient.network.issue.com/image.png" | ||
with mock.patch( | ||
"html2docx.image.urllib.request.urlopen", | ||
autospec=True, | ||
side_effect=urllib.error.URLError( | ||
reason="[Errno -2] Name or service not known" | ||
), | ||
) as url_mock: | ||
with mock.patch("html2docx.image.time.sleep", autospec=True) as time_mock: | ||
image_data = load_image(url) | ||
assert time_mock.mock_calls == [mock.call(1)] * 2 | ||
assert url_mock.call_args_list == [mock.call(url)] * 3 | ||
assert image_data.getbuffer() == broken_image_bytes | ||
|
||
|
||
def test_404(image_server): | ||
image_data = load_image(image_server.base_url + "nonexistent") | ||
assert image_data.getbuffer() == broken_image_bytes | ||
assert image_server.httpd.request_count == 1 | ||
|
||
|
||
def test_bad_server(bad_server): | ||
image_data = load_image(bad_server.base_url) | ||
assert image_data.getbuffer() == broken_image_bytes | ||
assert bad_server.httpd.request_count == 1 | ||
|
||
|
||
def test_bad_content_length(bad_content_length_server): | ||
image_data = load_image(bad_content_length_server.base_url) | ||
assert image_data.getbuffer() == broken_image_bytes | ||
assert bad_content_length_server.httpd.request_count == 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
import pathlib | ||
|
||
TEST_DIR = pathlib.Path(__file__).parent.resolve(strict=True) | ||
PROJECT_DIR = TEST_DIR.parent |