ENH: Add support for pathlib as input for PdfReader

py-pdf · Jun 12, 2022 · f9ac9c6 · f9ac9c6
1 parent 084745f
commit f9ac9c6
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 3 deletions.
diff --git a/PyPDF2/_reader.py b/PyPDF2/_reader.py
@@ -33,6 +33,7 @@
 import warnings
 from hashlib import md5
 from io import BytesIO
+from pathlib import Path
 from typing import (
     Any,
     Callable,
@@ -234,7 +235,7 @@ class PdfReader:
 
     def __init__(
         self,
-        stream: StrByteType,
+        stream: Union[StrByteType, Path],
         strict: bool = False,
         password: Union[None, str, bytes] = None,
     ) -> None:
@@ -251,7 +252,7 @@ def __init__(
                 "It may not be read correctly.",
                 PdfReadWarning,
             )
-        if isinstance(stream, str):
+        if isinstance(stream, (str, Path)):
             with open(stream, "rb") as fh:
                 stream = BytesIO(b_(fh.read()))
         self.read(stream)

diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -2,6 +2,7 @@
 import os
 import time
 from io import BytesIO
+from pathlib import Path
 
 import pytest
 
@@ -10,7 +11,11 @@
 from PyPDF2.constants import ImageAttributes as IA
 from PyPDF2.constants import PageAttributes as PG
 from PyPDF2.constants import Ressources as RES
-from PyPDF2.errors import PdfReadError, PdfReadWarning, STREAM_TRUNCATED_PREMATURELY
+from PyPDF2.errors import (
+    STREAM_TRUNCATED_PREMATURELY,
+    PdfReadError,
+    PdfReadWarning,
+)
 from PyPDF2.filters import _xobj_to_image
 from tests import get_pdf_from_url
 
@@ -691,3 +696,9 @@ def test_extract_text_hello_world():
         "Japanese:",
         "こんにちは世界",
     ]
+
+
+def test_read_path():
+    path = Path(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
+    reader = PdfReader(path)
+    assert len(reader.pages) == 1