Skip to content

Commit

Permalink
Remove mime type
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinThoma committed Sep 24, 2022
1 parent 4b77a6a commit 21b54ae
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 34 deletions.
10 changes: 4 additions & 6 deletions PyPDF2/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,10 @@ def images(self) -> List[File]:
x_object = self[PG.RESOURCES][RES.XOBJECT].get_object() # type: ignore
for obj in x_object:
if x_object[obj][IA.SUBTYPE] == "/Image":
mime_type, byte_stream = _xobj_to_image(x_object[obj])
if mime_type is not None:
filename = f"{obj[1:]}.{File._mime2extension(mime_type)}"
images_extracted.append(
File(name=filename, data=byte_stream, mime_type=mime_type)
)
extension, byte_stream = _xobj_to_image(x_object[obj])
if extension is not None:
filename = f"{obj[1:]}{extension}"
images_extracted.append(File(name=filename, data=byte_stream))
return images_extracted

@property
Expand Down
16 changes: 0 additions & 16 deletions PyPDF2/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,19 +420,3 @@ def rename_kwargs( # type: ignore
class File:
name: str
data: bytes
mime_type: str

@property
def format(self) -> str:
return File._mime2extension(self.mime_type)

@staticmethod
def _mime2extension(mime_type: str) -> str:
mapping = {
"image/png": "png",
"image/jpeg": "jpg",
"image/x-jp2": "jp2",
"image/gif": "gif",
"image/tiff": "tiff",
}
return mapping.get(mime_type, "unknown")
20 changes: 10 additions & 10 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
It's unclear if PyPDF2 will keep this function here, hence it's private.
It might get removed at any point.
:return: Tuple[mime type, bytes]
:return: Tuple[extension, bytes]
"""
from PIL import Image

Expand All @@ -576,10 +576,10 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
mode: Literal["RGB", "P"] = "RGB"
else:
mode = "P"
mime_type = None
extension = None
if SA.FILTER in x_object_obj:
if x_object_obj[SA.FILTER] == FT.FLATE_DECODE:
mime_type = "image/png"
extension = ".png" # mime_type = "image/png"
color_space = None
if "/ColorSpace" in x_object_obj:
color_space = x_object_obj["/ColorSpace"].get_object()
Expand Down Expand Up @@ -608,21 +608,21 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes]:
):
# I'm not sure if the mime types have any relationship to the filters
if x_object_obj[SA.FILTER] in [[FT.LZW_DECODE], [FT.CCITT_FAX_DECODE]]:
mime_type = "image/tiff"
extension = ".tiff" # mime_type = "image/tiff"
else:
mime_type = "image/png"
extension = ".png" # mime_type = "image/png"
data = b_(data)
elif x_object_obj[SA.FILTER] == FT.DCT_DECODE:
mime_type = "image/jpeg"
extension = ".jpg" # mime_type = "image/jpeg"
elif x_object_obj[SA.FILTER] == "/JPXDecode":
mime_type = "image/x-jp2"
extension = ".jp2" # mime_type = "image/x-jp2"
elif x_object_obj[SA.FILTER] == FT.CCITT_FAX_DECODE:
mime_type = "image/tiff"
extension = ".tiff" # mime_type = "image/tiff"
else:
mime_type = "image/png"
extension = ".png" # mime_type = "image/png"
img = Image.frombytes(mode, size, data)
img_byte_arr = BytesIO()
img.save(img_byte_arr, format="PNG")
data = img_byte_arr.getvalue()

return mime_type, data
return extension, data
6 changes: 4 additions & 2 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ def test_get_images(src, expected_images):
assert image.name == expected_image
with open(f"test-out-{src}-{image.name}", "wb") as fp:
fp.write(image.data)
assert image.format.upper() == Image.open(io.BytesIO(image.data)).format
assert image.mime_type == expected_mime
assert (
image.name.split(".")[-1].upper()
== Image.open(io.BytesIO(image.data)).format
)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 21b54ae

Please sign in to comment.