From 80c3939ca0d9865a6df9c13229586695232ececb Mon Sep 17 00:00:00 2001 From: Stefan <96178532+stefan6419846@users.noreply.github.com> Date: Fri, 18 Oct 2024 19:21:51 +0200 Subject: [PATCH] ROB: Soft failure for flate encode image mode 1 with wrong LUT size (#2900) Closes #2889. --- pypdf/_xobj_image_helpers.py | 18 ++++--- tests/test_xobject_image_helpers.py | 78 +++++++++++++++-------------- 2 files changed, 52 insertions(+), 44 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 9c4dfdaa8..353b8181c 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -209,14 +209,18 @@ def _handle_flate( if img.mode == "1": # Two values ("high" and "low"). expected_count = 2 * nb - if len(lookup) != expected_count: - if len(lookup) < expected_count: - raise PdfReadError( - f"Not enough lookup values: Expected {expected_count}, got {len(lookup)}." + actual_count = len(lookup) + if actual_count != expected_count: + if actual_count < expected_count: + logger_warning( + f"Not enough lookup values: Expected {expected_count}, got {actual_count}.", + __name__ ) - if not check_if_whitespace_only(lookup[expected_count:]): - raise PdfReadError( - f"Too many lookup values: Expected {expected_count}, got {len(lookup)}." + lookup += bytes([0] * (expected_count - actual_count)) + elif not check_if_whitespace_only(lookup[expected_count:]): + logger_warning( + f"Too many lookup values: Expected {expected_count}, got {actual_count}.", + __name__ ) lookup = lookup[:expected_count] colors_arr = [lookup[:nb], lookup[nb:]] diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 39b7131fc..6e1843585 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -29,7 +29,7 @@ def test_get_imagemode_recursion_depth(): reader.pages[0].images[0] -def test_handle_flate__image_mode_1(): +def test_handle_flate__image_mode_1(caplog): data = b"\x00\xe0\x00" lookup = DecodedStreamObject() expected_data = [ @@ -57,6 +57,7 @@ def test_handle_flate__image_mode_1(): obj_as_text="dummy", ) assert expected_data == list(result[0].getdata()) + assert not caplog.text # Trailing whitespace. lookup.set_data(b"\x42\x42\x42\x00\x13\x37 \x0a") @@ -71,48 +72,51 @@ def test_handle_flate__image_mode_1(): obj_as_text="dummy", ) assert expected_data == list(result[0].getdata()) + assert not caplog.text # Trailing non-whitespace character. lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12") - with pytest.raises( - PdfReadError, match=r"^Too many lookup values: Expected 6, got 7\.$" - ): - _handle_flate( - size=(3, 3), - data=data, - mode="1", - color_space=ArrayObject( - [ - NameObject("/Indexed"), - NameObject("/DeviceRGB"), - NumberObject(1), - lookup, - ] - ), - colors=2, - obj_as_text="dummy", - ) + result = _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject( + [ + NameObject("/Indexed"), + NameObject("/DeviceRGB"), + NumberObject(1), + lookup, + ] + ), + colors=2, + obj_as_text="dummy", + ) + assert expected_data == list(result[0].getdata()) + assert "Too many lookup values: Expected 6, got 7." in caplog.text # Not enough lookup data. + # `\xe0` of the original input (the middle part) does not use `0x37 = 55` for the lookup + # here, but received a custom padding of `0`. lookup.set_data(b"\x42\x42\x42\x00\x13") - with pytest.raises( - PdfReadError, match=r"^Not enough lookup values: Expected 6, got 5\.$" - ): - _handle_flate( - size=(3, 3), - data=data, - mode="1", - color_space=ArrayObject( - [ - NameObject("/Indexed"), - NameObject("/DeviceRGB"), - NumberObject(1), - lookup, - ] - ), - colors=2, - obj_as_text="dummy", - ) + caplog.clear() + expected_short_data = [entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data] + result = _handle_flate( + size=(3, 3), + data=data, + mode="1", + color_space=ArrayObject( + [ + NameObject("/Indexed"), + NameObject("/DeviceRGB"), + NumberObject(1), + lookup, + ] + ), + colors=2, + obj_as_text="dummy", + ) + assert expected_short_data == list(result[0].getdata()) + assert "Not enough lookup values: Expected 6, got 5." in caplog.text def test_extended_image_frombytes_zero_data():