From a2b7b55ad5472d080018306e173e6da3037a2445 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 15 Jul 2022 20:53:22 +0800 Subject: [PATCH 01/41] Add `with ... as ...` usage (#1108) --- PyPDF2/_merger.py | 21 +++++++++++++-------- PyPDF2/_writer.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 319a47ccd..ccd01ab9a 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -78,17 +78,29 @@ class PdfMerger: :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. """ - def __init__(self, strict: bool = False) -> None: + def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] self.output: Optional[PdfWriter] = PdfWriter() self.bookmarks: OutlinesType = [] self.named_dests: List[Any] = [] self.id_count = 0 + self.fileobj = fileobj self.strict = strict + # There is nothing to do. + def __enter__(self): + return self + + # Write to the fileobj and close the merger. + def __exit__(self, *args): + self.write(self.fileobj) + self.close() + def merge( self, position: int, @@ -252,10 +264,6 @@ def write(self, fileobj: StrByteType) -> None: """ if self.output is None: raise RuntimeError(ERR_CLOSED_WRITER) - my_file = False - if isinstance(fileobj, str): - fileobj = FileIO(fileobj, "wb") - my_file = True # Add pages to the PdfWriter # The commented out line below was replaced with the two lines below it @@ -276,9 +284,6 @@ def write(self, fileobj: StrByteType) -> None: # Write the output to the file self.output.write(fileobj) - if my_file: - fileobj.close() - def close(self) -> None: """Shut all file descriptors (input and output) and clear all memory usage.""" self.pages = [] diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 843549ab0..96dde4c4c 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -59,6 +59,7 @@ _get_max_pdf_version_header, b_, deprecate_with_replacement, + StrByteType ) from .constants import AnnotationDictionaryAttributes from .constants import CatalogAttributes as CA @@ -107,6 +108,8 @@ ZoomArgsType, ZoomArgType, ) +from io import FileIO + logger = logging.getLogger(__name__) @@ -121,7 +124,7 @@ class PdfWriter: class (typically :class:`PdfReader`). """ - def __init__(self) -> None: + def __init__(self, fileobj: StrByteType = "") -> None: self._header = b"%PDF-1.3" self._objects: List[Optional[PdfObject]] = [] # array of indirect objects self._idnum_hash: Dict[bytes, IndirectObject] = {} @@ -158,6 +161,19 @@ def __init__(self) -> None: ) self._root: Optional[IndirectObject] = None self._root_object = root + self.fileobj = fileobj + self.my_file = False + + # Nothing to do. + def __enter__(self): + return self + + # Write to the fileobj. + def __exit__(self, *args): + self.write(self.fileobj) + + if self.my_file: + self.fileobj.close() @property def pdf_header(self) -> bytes: @@ -763,7 +779,7 @@ def encrypt( self._encrypt = self._add_object(encrypt) self._encrypt_key = key - def write(self, stream: StreamType) -> None: + def write_stream(self, stream: StreamType) -> None: """ Write the collection of pages added to this object out as a PDF file. @@ -794,6 +810,16 @@ def write(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof + def write(self, fileobj): + + if isinstance(fileobj, str) and fileobj: + self.fileobj = FileIO(fileobj, "wb") + self.my_file = True + else: + self.fileobj = fileobj + + self.write_stream(self.fileobj) + def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] stream.write(self.pdf_header + b"\n") From 2a88bd52786380df5c1091a54b056f49f05075ce Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Sat, 16 Jul 2022 07:51:12 +0800 Subject: [PATCH 02/41] Update PyPDF2/_merger.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index ccd01ab9a..0ee0e49bd 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -93,7 +93,7 @@ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: self.strict = strict # There is nothing to do. - def __enter__(self): + def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. From b6d0351a4ae2ea9cf48e0d5f196a783eaa698558 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 07:59:16 +0800 Subject: [PATCH 03/41] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 0ee0e49bd..e1cb75f69 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -75,11 +75,11 @@ class PdfMerger: See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. - :param fileobj: Output file. Can be a filename or any kind of - file-like object. """ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: From 80813aa910025c5de8954020af11e9248f7ac298 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:04:02 +0800 Subject: [PATCH 04/41] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index e1cb75f69..476a35cce 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. from io import BytesIO, FileIO, IOBase -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type, TracebackType from ._encryption import Encryption from ._page import PageObject @@ -97,7 +97,8 @@ def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. - def __exit__(self, *args): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: + Optional[TracebackType]): self.write(self.fileobj) self.close() From e6ec1f6c847b7fdfab18e0c28a9c27ac5efa894e Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:15:09 +0800 Subject: [PATCH 05/41] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 4 ++-- PyPDF2/_writer.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 476a35cce..43f22fd75 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -97,8 +97,8 @@ def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. - def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: - Optional[TracebackType]): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], + traceback: Optional[TracebackType]): self.write(self.fileobj) self.close() diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 96dde4c4c..88d416a6b 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -47,6 +47,8 @@ Tuple, Union, cast, + Type, + TracebackType ) from PyPDF2.errors import PdfReadWarning @@ -169,7 +171,8 @@ def __enter__(self): return self # Write to the fileobj. - def __exit__(self, *args): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], + traceback: Optional[TracebackType]): self.write(self.fileobj) if self.my_file: From 386be5b398dce6cf10bc99534f2271ae325fa2c7 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:17:23 +0800 Subject: [PATCH 06/41] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 88d416a6b..43a14d131 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -813,7 +813,7 @@ def write_stream(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof - def write(self, fileobj): + def write(self, fileobj: StrByteType) -> None: if isinstance(fileobj, str) and fileobj: self.fileobj = FileIO(fileobj, "wb") From 31786bba18190fbfe983b2410f430a68f783c19e Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:40:44 +0800 Subject: [PATCH 07/41] Sorry, I forgot to run before committing, so didn't notice that `TracebackType` is from `typing` but from `types`. --- PyPDF2/_merger.py | 3 ++- PyPDF2/_writer.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 43f22fd75..21b0cbab1 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. from io import BytesIO, FileIO, IOBase -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type, TracebackType +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type from ._encryption import Encryption from ._page import PageObject @@ -51,6 +51,7 @@ ) from .pagerange import PageRange, PageRangeSpec from .types import FitType, LayoutType, OutlinesType, PagemodeType, ZoomArgType +from types import TracebackType ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 43a14d131..cc48ff2f2 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -47,8 +47,7 @@ Tuple, Union, cast, - Type, - TracebackType + Type ) from PyPDF2.errors import PdfReadWarning @@ -111,6 +110,7 @@ ZoomArgType, ) from io import FileIO +from types import TracebackType logger = logging.getLogger(__name__) From ae1bec010b9ff8a1300c243a061e177abae584e7 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 09:28:55 +0800 Subject: [PATCH 08/41] Modify the wrong closing place that cause. --- PyPDF2/_writer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index cc48ff2f2..355132dde 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -164,7 +164,6 @@ def __init__(self, fileobj: StrByteType = "") -> None: self._root: Optional[IndirectObject] = None self._root_object = root self.fileobj = fileobj - self.my_file = False # Nothing to do. def __enter__(self): @@ -175,9 +174,6 @@ def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseEx traceback: Optional[TracebackType]): self.write(self.fileobj) - if self.my_file: - self.fileobj.close() - @property def pdf_header(self) -> bytes: """ @@ -814,14 +810,18 @@ def write_stream(self, stream: StreamType) -> None: stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def write(self, fileobj: StrByteType) -> None: + my_file = False if isinstance(fileobj, str) and fileobj: - self.fileobj = FileIO(fileobj, "wb") - self.my_file = True + fileobj = FileIO(fileobj, "wb") + my_file = True else: - self.fileobj = fileobj + ... + + self.write_stream(fileobj) - self.write_stream(self.fileobj) + if my_file: + fileobj.close() def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From b695113ea65b2ed12d185cc0c4343411118065f9 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 08:30:38 +0800 Subject: [PATCH 09/41] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 355132dde..55d3fe0f1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -816,7 +816,7 @@ def write(self, fileobj: StrByteType) -> None: fileobj = FileIO(fileobj, "wb") my_file = True else: - ... + pass self.write_stream(fileobj) From 58797c2f6a4ce8d6a7984bb133a6fcd6a3450590 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:03 +0800 Subject: [PATCH 10/41] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 55d3fe0f1..6ff7a56f1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -166,7 +166,7 @@ def __init__(self, fileobj: StrByteType = "") -> None: self.fileobj = fileobj # Nothing to do. - def __enter__(self): + def __enter__(self) -> "PdfWriter": return self # Write to the fileobj. From 1caa9ecb9389ac02186aa3af6f8c219e9c57ce41 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:26 +0800 Subject: [PATCH 11/41] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 6ff7a56f1..a1312678f 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -171,7 +171,7 @@ def __enter__(self) -> "PdfWriter": # Write to the fileobj. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]): + traceback: Optional[TracebackType]) -> None: self.write(self.fileobj) @property From 4fbe3cc679b573069d93fcc59b9becc65372f1d7 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:40 +0800 Subject: [PATCH 12/41] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 21b0cbab1..44f623ca7 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -99,7 +99,7 @@ def __enter__(self) -> "PdfMerger": # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]): + traceback: Optional[TracebackType]) -> None: self.write(self.fileobj) self.close() From d598e8ab0de9beeaed1c7043095e8fba7aeb0685 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Mon, 18 Jul 2022 09:11:18 +0800 Subject: [PATCH 13/41] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions --- PyPDF2/_writer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 55d3fe0f1..76b5fcd75 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -812,7 +812,10 @@ def write_stream(self, stream: StreamType) -> None: def write(self, fileobj: StrByteType) -> None: my_file = False - if isinstance(fileobj, str) and fileobj: + if fileobj == "": + raise ValueError(f"Output(fileobj={fileobj}) is empty.") + + if isinstance(fileobj, str): fileobj = FileIO(fileobj, "wb") my_file = True else: @@ -1630,7 +1633,7 @@ def _get_page_layout(self) -> Optional[LayoutType]: def getPageLayout(self) -> Optional[LayoutType]: # pragma: no cover """ - .. deprecated:: 1.28.0 + .. deprecated:: 1.28.0raise Use :py:attr:`page_layout` instead. """ From 519dad1246203d0bcf2fda7d1c5a57b37b9c6019 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 09:26:10 +0800 Subject: [PATCH 14/41] Fix accident --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 85ecae30e..d223027d1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -1633,7 +1633,7 @@ def _get_page_layout(self) -> Optional[LayoutType]: def getPageLayout(self) -> Optional[LayoutType]: # pragma: no cover """ - .. deprecated:: 1.28.0raise + .. deprecated:: 1.28.0 Use :py:attr:`page_layout` instead. """ From 90af68a30c90c39a3a6104f5ba99d915e3691dff Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 11:06:33 +0800 Subject: [PATCH 15/41] Fix error raising while using half traditional usage --- PyPDF2/_merger.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 44f623ca7..4e2876395 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -100,7 +100,8 @@ def __enter__(self) -> "PdfMerger": # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - self.write(self.fileobj) + if self.fileobj: + self.write(self.fileobj) self.close() def merge( From 0f6765805a8f8433974b813c78d799ebcc057c4a Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 11:17:20 +0800 Subject: [PATCH 16/41] Add a unit test (Problems still exist, please help.) --- tests/test_merger.py | 40 ++++++++++++++++++++++++------ tests/test_writer.py | 58 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 18 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 2a9d9e4e6..3a7fd028c 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -17,13 +17,13 @@ sys.path.append(PROJECT_ROOT) -def test_merge(): +def merger_operate(merger): pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") - merger = PyPDF2.PdfMerger() + # merger = PyPDF2.PdfMerger() # string path: merger.append(pdf_path) @@ -78,10 +78,6 @@ def test_merge(): merger.set_page_layout("/SinglePage") merger.set_page_mode("/UseThumbs") - tmp_path = "dont_commit_merged.pdf" - merger.write(tmp_path) - merger.close() - # Check if bookmarks are correct reader = PyPDF2.PdfReader(tmp_path) assert [ @@ -102,7 +98,37 @@ def test_merge(): # TODO: There seem to be no destinations for those links? - # Clean up + +tmp_path = "dont_commit_merged.pdf" + + +def test_writer_operations_by_totally_traditional_usage(): + merger = PdfMerger() + + merger_operate(merger) + + merger.write(tmp_path) + merger.close() + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_traditional_usage(): + with PdfMerger() as merger: + merger_operate(merger) + + merger.write(tmp_path) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operation_by_totally_new_usage(): + with PdfMerger(tmp_path) as merger: + merger_operate(merger) + + # cleanup os.remove(tmp_path) diff --git a/tests/test_writer.py b/tests/test_writer.py index f2d3a56c6..c1b5ebdb0 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -24,12 +24,9 @@ def test_writer_clone(): assert len(writer.pages) == 4 -def test_writer_operations(): +def writer_operate(writer): """ - This test just checks if the operation throws an exception. - - This should be done way more thoroughly: It should be checked if the - output is as expected. + To test the writer that initialized by each of the four usages. """ pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") pdf_outline_path = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") @@ -37,7 +34,6 @@ def test_writer_operations(): reader = PdfReader(pdf_path) reader_outline = PdfReader(pdf_outline_path) - writer = PdfWriter() page = reader.pages[0] with pytest.raises(PageSizeNotDefinedError) as exc: writer.add_blank_page() @@ -80,17 +76,57 @@ def test_writer_operations(): writer.add_attachment("foobar.gif", b"foobarcontent") - # finally, write "output" to PyPDF2-output.pdf - tmp_path = "dont_commit_writer.pdf" - with open(tmp_path, "wb") as output_stream: - writer.write(output_stream) - # Check that every key in _idnum_hash is correct objects_hash = [o.hash_value() for o in writer._objects] for k, v in writer._idnum_hash.items(): assert v.pdf == writer assert k in objects_hash, "Missing %s" % v + +tmp_path = "dont_commit_writer.pdf" + + +def test_writer_operations_by_totally_traditional_usage(): + writer = PdfWriter() + + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + with open(tmp_path, "wb") as output_stream: + writer.write(output_stream) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_traditional_usage(): + with PdfWriter() as writer: + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + with open(tmp_path, "wb") as output_stream: + writer.write(output_stream) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_new_traditional_usage(): + with PdfWriter() as writer: + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + writer.write(tmp_path) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operation_by_totally_new_usage(): + # This includes write "output" to PyPDF2-output.pdf + with PdfWriter(tmp_path) as writer: + writer_operate(writer) + # cleanup os.remove(tmp_path) From a6f973e20e224519520c62363cb330a4f85aee12 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Thu, 21 Jul 2022 19:13:43 +0800 Subject: [PATCH 17/41] Update PyPDF2/_merger.py according to @MartinThoma's suggestions Co-authored-by: Martin Thoma --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 4e2876395..7db906695 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -97,9 +97,9 @@ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: def __enter__(self) -> "PdfMerger": return self - # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: + """Write to the fileobj and close the merger.""" if self.fileobj: self.write(self.fileobj) self.close() From 580ad8c9fdcfbd7220764e436f829e13030b6d50 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:17:34 +0800 Subject: [PATCH 18/41] Modify PyPDF2/_merger.py according to flake8 --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 7db906695..a48a99cc7 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -99,7 +99,7 @@ def __enter__(self) -> "PdfMerger": def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - """Write to the fileobj and close the merger.""" + """Write to the fileobj and close the merger.""" if self.fileobj: self.write(self.fileobj) self.close() From bf264bca1c478f77c87f565739057df81ce13565 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:32:25 +0800 Subject: [PATCH 19/41] Modify to compatible with the existing usage --- PyPDF2/_merger.py | 5 ++++- PyPDF2/_writer.py | 3 +-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index a48a99cc7..f50a8c249 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -285,7 +285,10 @@ def write(self, fileobj: StrByteType) -> None: self._write_bookmarks() # Write the output to the file - self.output.write(fileobj) + my_file = self.output.write(fileobj) + + if my_file: + fileobj.close() def close(self) -> None: """Shut all file descriptors (input and output) and clear all memory usage.""" diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index d5a96b6de..347ab1498 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -823,8 +823,7 @@ def write(self, fileobj: StrByteType) -> None: self.write_stream(fileobj) - if my_file: - fileobj.close() + return my_file def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From c705300bf1d0f832403d7a78672abdd87aa40218 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:48:57 +0800 Subject: [PATCH 20/41] Modify to compatible with the with .. as ... usage --- PyPDF2/_writer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 347ab1498..a3be881de 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -164,9 +164,11 @@ def __init__(self, fileobj: StrByteType = "") -> None: self._root: Optional[IndirectObject] = None self._root_object = root self.fileobj = fileobj + self.with_as_usage = False - # Nothing to do. + # Let it know whether it is initialized by with ... as ... usage or not def __enter__(self) -> "PdfWriter": + self.with_as_usage = True return self # Write to the fileobj. @@ -823,6 +825,9 @@ def write(self, fileobj: StrByteType) -> None: self.write_stream(fileobj) + if self.with_as_usage: + fileobj.close() + return my_file def _write_header(self, stream: StreamType) -> List[int]: From 2940589924da978bf60bcca95de2c127d639cbc2 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:56:11 +0800 Subject: [PATCH 21/41] Removed a meaningless annotation --- tests/test_merger.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 3a7fd028c..48b3a75ac 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -23,8 +23,6 @@ def merger_operate(merger): pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") - # merger = PyPDF2.PdfMerger() - # string path: merger.append(pdf_path) merger.append(outline) From cda38ac04d6c66f59345dae1eb9dbcc32c48643a Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:57:39 +0800 Subject: [PATCH 22/41] Fixed wrong test function names --- tests/test_merger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 48b3a75ac..0807e903b 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -100,7 +100,7 @@ def merger_operate(merger): tmp_path = "dont_commit_merged.pdf" -def test_writer_operations_by_totally_traditional_usage(): +def test_merger_operations_by_totally_traditional_usage(): merger = PdfMerger() merger_operate(merger) @@ -112,7 +112,7 @@ def test_writer_operations_by_totally_traditional_usage(): os.remove(tmp_path) -def test_writer_operations_by_semi_traditional_usage(): +def test_merger_operations_by_semi_traditional_usage(): with PdfMerger() as merger: merger_operate(merger) @@ -122,7 +122,7 @@ def test_writer_operations_by_semi_traditional_usage(): os.remove(tmp_path) -def test_writer_operation_by_totally_new_usage(): +def test_merger_operation_by_totally_new_usage(): with PdfMerger(tmp_path) as merger: merger_operate(merger) From 0b181980a0f3fe842ec5443021104c0afcb56496 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 22 Jul 2022 09:36:46 +0800 Subject: [PATCH 23/41] Fixed those I can fix only. --- PyPDF2/_merger.py | 2 +- PyPDF2/_writer.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index f50a8c249..0adccf514 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -285,7 +285,7 @@ def write(self, fileobj: StrByteType) -> None: self._write_bookmarks() # Write the output to the file - my_file = self.output.write(fileobj) + my_file, fileobj = self.output.write(fileobj) if my_file: fileobj.close() diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index a3be881de..d494b0d5f 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -174,7 +174,8 @@ def __enter__(self) -> "PdfWriter": # Write to the fileobj. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - self.write(self.fileobj) + if self.fileobj: + self.write(self.fileobj) @property def pdf_header(self) -> bytes: @@ -828,7 +829,7 @@ def write(self, fileobj: StrByteType) -> None: if self.with_as_usage: fileobj.close() - return my_file + return my_file, fileobj def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From 62f970f1d4df2fddfeb985d1a782230c6f7c0287 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Fri, 22 Jul 2022 19:29:41 +0800 Subject: [PATCH 24/41] Removed useless `else` section Co-authored-by: Martin Thoma --- PyPDF2/_writer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index d494b0d5f..c62e6c5d0 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -821,8 +821,6 @@ def write(self, fileobj: StrByteType) -> None: if isinstance(fileobj, str): fileobj = FileIO(fileobj, "wb") my_file = True - else: - pass self.write_stream(fileobj) From d85c91b540efbd375507ca9b8fbb777b6bbc0921 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Fri, 22 Jul 2022 19:34:45 +0800 Subject: [PATCH 25/41] Renamed argument name `fileobj` back to `stream` to keep the existing workflow Co-authored-by: Martin Thoma --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index c62e6c5d0..36da8360c 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -812,7 +812,7 @@ def write_stream(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof - def write(self, fileobj: StrByteType) -> None: + def write(self, stream: StrByteType) -> None: my_file = False if fileobj == "": From 5cd3f3e670071ac3a7d2d49698bc605dda1def79 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 22 Jul 2022 19:55:06 +0800 Subject: [PATCH 26/41] Modified annoation for `PdfWriter().write()` and `PdfWriter().write_stream()` --- PyPDF2/_writer.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 36da8360c..1782fe810 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -782,12 +782,6 @@ def encrypt( self._encrypt_key = key def write_stream(self, stream: StreamType) -> None: - """ - Write the collection of pages added to this object out as a PDF file. - - :param stream: An object to write the file to. The object must support - the write method and the tell method, similar to a file object. - """ if hasattr(stream, "mode") and "b" not in stream.mode: warnings.warn( f"File <{stream.name}> to write to is not in binary mode. " # type: ignore @@ -813,6 +807,14 @@ def write_stream(self, stream: StreamType) -> None: stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def write(self, stream: StrByteType) -> None: + """ + Write the collection of pages added to this object out as a PDF file. + + :param stream: An object to write the file to. The object can support + the write method and the tell method, similar to a file object, or + be a file path, just like the fileobj, just named it stream to keep + existing workflow. + """ my_file = False if fileobj == "": From a7346ef13ee2514107d6cab26b52a224d0036d0b Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Sat, 23 Jul 2022 15:04:09 +0800 Subject: [PATCH 27/41] Switched `fileobj` and `strict` in initializing `PdfWriter()` to keep the existing workflow Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 0adccf514..dd95b43da 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -83,7 +83,7 @@ class PdfMerger: Defaults to ``False``. """ - def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: + def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] self.output: Optional[PdfWriter] = PdfWriter() From d3e62ee81962c70d6bd5e5346ceb241cce859638 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 23 Jul 2022 15:06:53 +0800 Subject: [PATCH 28/41] Modified annoation to make the it match the arguments --- PyPDF2/_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index dd95b43da..8841686de 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -76,11 +76,11 @@ class PdfMerger: See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. - :param fileobj: Output file. Can be a filename or any kind of - file-like object. :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. """ def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: From 59be94d69386ce7b907c11ec1b2ea7b756ab32d3 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sun, 24 Jul 2022 15:40:55 +0800 Subject: [PATCH 29/41] Modified undefined name `fileobj` --- PyPDF2/_writer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 1782fe810..08d08a4ee 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -817,19 +817,19 @@ def write(self, stream: StrByteType) -> None: """ my_file = False - if fileobj == "": - raise ValueError(f"Output(fileobj={fileobj}) is empty.") + if stream == "": + raise ValueError(f"Output(stream={stream}) is empty.") - if isinstance(fileobj, str): - fileobj = FileIO(fileobj, "wb") + if isinstance(stream, str): + stream = FileIO(stream, "wb") my_file = True - self.write_stream(fileobj) + self.write_stream(stream) if self.with_as_usage: - fileobj.close() + stream.close() - return my_file, fileobj + return my_file, stream def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From da7a2396cf4ae252aabb018f7169c5a655c34921 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 18:49:41 +0200 Subject: [PATCH 30/41] Fixes --- PyPDF2/_merger.py | 25 +++++++++++++++++++------ PyPDF2/_writer.py | 17 ++++++++++------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 0dfe6f44f..efacbaeb4 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -27,7 +27,18 @@ from io import BytesIO, FileIO, IOBase from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type +from types import TracebackType +from typing import ( + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Type, + Union, + cast, +) from ._encryption import Encryption from ._page import PageObject @@ -57,8 +68,6 @@ ) from .pagerange import PageRange, PageRangeSpec from .types import FitType, LayoutType, OutlineType, PagemodeType, ZoomArgType -from types import TracebackType - ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" @@ -90,7 +99,7 @@ class PdfMerger: file-like object. """ - @deprecate_bookmark(bookmarks="outline") + @deprecate_bookmark(bookmarks="outline") def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] @@ -105,8 +114,12 @@ def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: def __enter__(self) -> "PdfMerger": return self - def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]) -> None: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: """Write to the fileobj and close the merger.""" if self.fileobj: self.write(self.fileobj) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 0b8cc8709..fbcdb75ee 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -36,6 +36,8 @@ import time import uuid from hashlib import md5 +from io import FileIO +from types import TracebackType from typing import ( Any, Callable, @@ -44,21 +46,21 @@ List, Optional, Tuple, + Type, Union, cast, - Type ) from ._page import PageObject, _VirtualList from ._reader import PdfReader from ._security import _alg33, _alg34, _alg35 from ._utils import ( + StrByteType, StreamType, _get_max_pdf_version_header, b_, deprecate_bookmark, deprecate_with_replacement, - StrByteType logger_warning, ) from .constants import AnnotationDictionaryAttributes @@ -109,9 +111,6 @@ ZoomArgsType, ZoomArgType, ) -from io import FileIO -from types import TracebackType - logger = logging.getLogger(__name__) @@ -172,8 +171,12 @@ def __enter__(self) -> "PdfWriter": return self # Write to the fileobj. - def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]) -> None: + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: if self.fileobj: self.write(self.fileobj) From fa8880fad222ac76e4ee1c168382634b41091f71 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 18:57:25 +0200 Subject: [PATCH 31/41] os.path.join --- tests/test_merger.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index ecc273443..c26aec2ca 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -19,10 +19,10 @@ def merger_operate(merger): - pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") - outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") - pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") - pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") + pdf_path = RESOURCE_ROOT / "crazyones.pdf" + outline = RESOURCE_ROOT / "pdflatex-outline.pdf" + pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf" + pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf" # string path: merger.append(pdf_path) @@ -132,6 +132,7 @@ def test_merger_operations_by_semi_traditional_usage(): merger_operate(merger) merger.write(tmp_path) + assert os.path.isfile(tmp_path) # cleanup os.remove(tmp_path) From 626d064bbf01005f4981547b234d752c9ddbe9e1 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 21:55:36 +0200 Subject: [PATCH 32/41] Fix issues --- PyPDF2/_merger.py | 2 ++ tests/test_merger.py | 20 +++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index efacbaeb4..642f04b01 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -122,7 +122,9 @@ def __exit__( ) -> None: """Write to the fileobj and close the merger.""" if self.fileobj: + print(f"write to {self.fileobj}") self.write(self.fileobj) + print("nope") self.close() @deprecate_bookmark(bookmark="outline_item", import_bookmarks="import_outline") diff --git a/tests/test_merger.py b/tests/test_merger.py index c26aec2ca..63b689e07 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -93,6 +93,8 @@ def merger_operate(merger): merger.set_page_layout("/SinglePage") merger.set_page_mode("/UseThumbs") + +def check_outline(tmp_path): # Check if outline is correct reader = PyPDF2.PdfReader(tmp_path) assert [el.title for el in reader.outline if isinstance(el, Destination)] == [ @@ -116,32 +118,44 @@ def merger_operate(merger): def test_merger_operations_by_totally_traditional_usage(): + # Arrange merger = PdfMerger() - merger_operate(merger) + # Act merger.write(tmp_path) merger.close() + # Assert + check_outline(tmp_path) + # cleanup os.remove(tmp_path) def test_merger_operations_by_semi_traditional_usage(): with PdfMerger() as merger: + # Arrange merger_operate(merger) - + # Act merger.write(tmp_path) + + # Assert assert os.path.isfile(tmp_path) + check_outline(tmp_path) # cleanup os.remove(tmp_path) def test_merger_operation_by_totally_new_usage(): - with PdfMerger(tmp_path) as merger: + with PdfMerger(fileobj=tmp_path) as merger: merger_operate(merger) + # Assert + assert os.path.isfile(tmp_path) + check_outline(tmp_path) + # cleanup os.remove(tmp_path) From 6af6969a4aedd7385be602151ea120e8b431da2b Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 22:03:55 +0200 Subject: [PATCH 33/41] Add type hint --- PyPDF2/_writer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index fbcdb75ee..cd2e4007e 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -36,7 +36,7 @@ import time import uuid from hashlib import md5 -from io import FileIO +from io import BufferedReader, BufferedWriter, BytesIO, FileIO from types import TracebackType from typing import ( Any, @@ -818,7 +818,9 @@ def write_stream(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof - def write(self, stream: StrByteType) -> None: + def write( + self, stream: StrByteType + ) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]: """ Write the collection of pages added to this object out as a PDF file. From 73c2ffb4fdcff49f7a512ee594a38ec3f7b8b07f Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 22:21:27 +0200 Subject: [PATCH 34/41] Test one more line --- tests/test_merger.py | 4 ++-- tests/test_writer.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 63b689e07..9480fdeec 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -117,7 +117,7 @@ def check_outline(tmp_path): tmp_path = "dont_commit_merged.pdf" -def test_merger_operations_by_totally_traditional_usage(): +def test_merger_operations_by_traditional_usage(): # Arrange merger = PdfMerger() merger_operate(merger) @@ -148,7 +148,7 @@ def test_merger_operations_by_semi_traditional_usage(): os.remove(tmp_path) -def test_merger_operation_by_totally_new_usage(): +def test_merger_operation_by_new_usage(): with PdfMerger(fileobj=tmp_path) as merger: merger_operate(merger) diff --git a/tests/test_writer.py b/tests/test_writer.py index 272234ff0..668fd9eff 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -692,3 +692,13 @@ def test_colors_in_outline_item(): # Cleanup os.remove(target) # remove for testing + + +def test_write_empty_stream(): + reader = PdfReader(EXTERNAL_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") + writer = PdfWriter() + writer.clone_document_from_reader(reader) + + with pytest.raises(ValueError) as exc: + writer.write("") + assert exc.value.args[0] == "Output(stream=) is empty." From 5e6f66331cfdac366a1cd048e157e6efe23bc95e Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 22:33:07 +0200 Subject: [PATCH 35/41] Update comments --- PyPDF2/_merger.py | 2 +- PyPDF2/_writer.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 642f04b01..930ca9827 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -110,8 +110,8 @@ def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: self.fileobj = fileobj self.strict = strict - # There is nothing to do. def __enter__(self) -> "PdfMerger": + # There is nothing to do. return self def __exit__( diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index cd2e4007e..e5a422084 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -165,18 +165,18 @@ def __init__(self, fileobj: StrByteType = "") -> None: self.fileobj = fileobj self.with_as_usage = False - # Let it know whether it is initialized by with ... as ... usage or not def __enter__(self) -> "PdfWriter": + """Store that writer is initialized by 'with'.""" self.with_as_usage = True return self - # Write to the fileobj. def __exit__( self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType], ) -> None: + """Write data to the fileobj.""" if self.fileobj: self.write(self.fileobj) From 8ffda32dd2456f931d24274bb5c5f7b04a458bc6 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Mon, 1 Aug 2022 22:35:00 +0200 Subject: [PATCH 36/41] Add JianzhengLuo to contributors list --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 28d3d9b8d..23f1ee1a7 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -11,6 +11,7 @@ history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/PyPDF2/gr ## Contributors to the pyPdf / PyPDF2 project +* [JianzhengLuo](https://github.com/JianzhengLuo) * [Karvonen, Harry](https://github.com/Hatell/) * [KourFrost](https://github.com/KourFrost) * [Lightup1](https://github.com/Lightup1) From 288835352726fb2d35c267d04932bd2ecd4c95af Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Aug 2022 07:36:50 +0200 Subject: [PATCH 37/41] Allow using Path --- PyPDF2/_merger.py | 6 ++++-- PyPDF2/_writer.py | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 930ca9827..d69b548f8 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -100,7 +100,9 @@ class PdfMerger: """ @deprecate_bookmark(bookmarks="outline") - def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: + def __init__( + self, strict: bool = False, fileobj: Union[Path, StrByteType] = "" + ) -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] self.output: Optional[PdfWriter] = PdfWriter() @@ -285,7 +287,7 @@ def append( """ self.merge(len(self.pages), fileobj, outline_item, pages, import_outline) - def write(self, fileobj: StrByteType) -> None: + def write(self, fileobj: Union[Path, StrByteType]) -> None: """ Write all data that has been merged to the given output file. diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index e5a422084..d9448d1bf 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -37,6 +37,7 @@ import uuid from hashlib import md5 from io import BufferedReader, BufferedWriter, BytesIO, FileIO +from pathlib import Path from types import TracebackType from typing import ( Any, @@ -819,7 +820,7 @@ def write_stream(self, stream: StreamType) -> None: stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def write( - self, stream: StrByteType + self, stream: Union[Path, StrByteType] ) -> Tuple[bool, Union[FileIO, BytesIO, BufferedReader, BufferedWriter]]: """ Write the collection of pages added to this object out as a PDF file. @@ -834,7 +835,7 @@ def write( if stream == "": raise ValueError(f"Output(stream={stream}) is empty.") - if isinstance(stream, str): + if isinstance(stream, (str, Path)): stream = FileIO(stream, "wb") my_file = True From aa2f64aa1894873ddf70c0c6d5b11fde68b41cbb Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Aug 2022 07:39:04 +0200 Subject: [PATCH 38/41] Remove print --- PyPDF2/_merger.py | 2 -- tests/test_generic.py | 1 - 2 files changed, 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index d69b548f8..db974ce65 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -124,9 +124,7 @@ def __exit__( ) -> None: """Write to the fileobj and close the merger.""" if self.fileobj: - print(f"write to {self.fileobj}") self.write(self.fileobj) - print("nope") self.close() @deprecate_bookmark(bookmark="outline_item", import_bookmarks="import_outline") diff --git a/tests/test_generic.py b/tests/test_generic.py index 0ed2ee7d1..350ab8aef 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -351,7 +351,6 @@ class Tst: # to replace pdf if length in (6, 10): assert b"BT /F1" in do._StreamObject__data raise PdfReadError("__ALLGOOD__") - print(exc.value) assert should_fail ^ (exc.value.args[0] == "__ALLGOOD__") From d731281c8d11e14464a421bab7459efc340ab2fd Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Aug 2022 07:41:32 +0200 Subject: [PATCH 39/41] Apply suggestions from code review Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index db974ce65..a63fe76cc 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -312,10 +312,10 @@ def write(self, fileobj: Union[Path, StrByteType]) -> None: self._write_outline() # Write the output to the file - my_file, fileobj = self.output.write(fileobj) + my_file, ret_fileobj = self.output.write(fileobj) if my_file: - fileobj.close() + ret_fileobj.close() def close(self) -> None: """Shut all file descriptors (input and output) and clear all memory usage.""" From 698821eb51065aa64c779e38ad77a50d4bea0de1 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Aug 2022 07:50:27 +0200 Subject: [PATCH 40/41] More tests --- tests/test_writer.py | 74 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/tests/test_writer.py b/tests/test_writer.py index 668fd9eff..667d6590b 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -97,49 +97,91 @@ def writer_operate(writer): tmp_path = "dont_commit_writer.pdf" -def test_writer_operations_by_totally_traditional_usage(): +@pytest.mark.parametrize( + ("write_data_here", "needs_cleanup"), + [ + ("dont_commit_writer.pdf", True), + (Path("dont_commit_writer.pdf"), True), + (BytesIO(), False), + ], +) +def test_writer_operations_by_traditional_usage(write_data_here, needs_cleanup): writer = PdfWriter() writer_operate(writer) # finally, write "output" to PyPDF2-output.pdf - with open(tmp_path, "wb") as output_stream: + if needs_cleanup: + with open(write_data_here, "wb") as output_stream: + writer.write(output_stream) + else: + output_stream = write_data_here writer.write(output_stream) - # cleanup - os.remove(tmp_path) + if needs_cleanup: + os.remove(write_data_here) -def test_writer_operations_by_semi_traditional_usage(): +@pytest.mark.parametrize( + ("write_data_here", "needs_cleanup"), + [ + ("dont_commit_writer.pdf", True), + (Path("dont_commit_writer.pdf"), True), + (BytesIO(), False), + ], +) +def test_writer_operations_by_semi_traditional_usage(write_data_here, needs_cleanup): with PdfWriter() as writer: writer_operate(writer) # finally, write "output" to PyPDF2-output.pdf - with open(tmp_path, "wb") as output_stream: + if needs_cleanup: + with open(write_data_here, "wb") as output_stream: + writer.write(output_stream) + else: + output_stream = write_data_here writer.write(output_stream) - # cleanup - os.remove(tmp_path) + if needs_cleanup: + os.remove(write_data_here) -def test_writer_operations_by_semi_new_traditional_usage(): +@pytest.mark.parametrize( + ("write_data_here", "needs_cleanup"), + [ + ("dont_commit_writer.pdf", True), + (Path("dont_commit_writer.pdf"), True), + (BytesIO(), False), + ], +) +def test_writer_operations_by_semi_new_traditional_usage( + write_data_here, needs_cleanup +): with PdfWriter() as writer: writer_operate(writer) # finally, write "output" to PyPDF2-output.pdf - writer.write(tmp_path) + writer.write(write_data_here) - # cleanup - os.remove(tmp_path) + if needs_cleanup: + os.remove(write_data_here) -def test_writer_operation_by_totally_new_usage(): +@pytest.mark.parametrize( + ("write_data_here", "needs_cleanup"), + [ + ("dont_commit_writer.pdf", True), + (Path("dont_commit_writer.pdf"), True), + (BytesIO(), False), + ], +) +def test_writer_operation_by_new_usage(write_data_here, needs_cleanup): # This includes write "output" to PyPDF2-output.pdf - with PdfWriter(tmp_path) as writer: + with PdfWriter(write_data_here) as writer: writer_operate(writer) - # cleanup - os.remove(tmp_path) + if needs_cleanup: + os.remove(write_data_here) @pytest.mark.parametrize( From 5464bd00bdaeed7b26e723f5e58d0d84a35d05b5 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Tue, 2 Aug 2022 13:14:09 +0200 Subject: [PATCH 41/41] Use tmp_path fixture --- tests/test_merger.py | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 9480fdeec..7411a7a9d 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -114,50 +114,43 @@ def check_outline(tmp_path): # TODO: There seem to be no destinations for those links? -tmp_path = "dont_commit_merged.pdf" +tmp_filename = "dont_commit_merged.pdf" -def test_merger_operations_by_traditional_usage(): +def test_merger_operations_by_traditional_usage(tmp_path): # Arrange merger = PdfMerger() merger_operate(merger) + path = tmp_path / tmp_filename # Act - merger.write(tmp_path) + merger.write(path) merger.close() # Assert - check_outline(tmp_path) + check_outline(path) - # cleanup - os.remove(tmp_path) +def test_merger_operations_by_semi_traditional_usage(tmp_path): + path = tmp_path / tmp_filename -def test_merger_operations_by_semi_traditional_usage(): with PdfMerger() as merger: - # Arrange merger_operate(merger) - # Act - merger.write(tmp_path) + merger.write(path) # Act # Assert - assert os.path.isfile(tmp_path) - check_outline(tmp_path) - - # cleanup - os.remove(tmp_path) + assert os.path.isfile(path) + check_outline(path) -def test_merger_operation_by_new_usage(): - with PdfMerger(fileobj=tmp_path) as merger: +def test_merger_operation_by_new_usage(tmp_path): + path = tmp_path / tmp_filename + with PdfMerger(fileobj=path) as merger: merger_operate(merger) # Assert - assert os.path.isfile(tmp_path) - check_outline(tmp_path) - - # cleanup - os.remove(tmp_path) + assert os.path.isfile(path) + check_outline(path) def test_merge_page_exception():