From a2b7b55ad5472d080018306e173e6da3037a2445 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 15 Jul 2022 20:53:22 +0800 Subject: [PATCH 01/29] Add `with ... as ...` usage (#1108) --- PyPDF2/_merger.py | 21 +++++++++++++-------- PyPDF2/_writer.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 319a47ccd..ccd01ab9a 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -78,17 +78,29 @@ class PdfMerger: :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. """ - def __init__(self, strict: bool = False) -> None: + def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] self.output: Optional[PdfWriter] = PdfWriter() self.bookmarks: OutlinesType = [] self.named_dests: List[Any] = [] self.id_count = 0 + self.fileobj = fileobj self.strict = strict + # There is nothing to do. + def __enter__(self): + return self + + # Write to the fileobj and close the merger. + def __exit__(self, *args): + self.write(self.fileobj) + self.close() + def merge( self, position: int, @@ -252,10 +264,6 @@ def write(self, fileobj: StrByteType) -> None: """ if self.output is None: raise RuntimeError(ERR_CLOSED_WRITER) - my_file = False - if isinstance(fileobj, str): - fileobj = FileIO(fileobj, "wb") - my_file = True # Add pages to the PdfWriter # The commented out line below was replaced with the two lines below it @@ -276,9 +284,6 @@ def write(self, fileobj: StrByteType) -> None: # Write the output to the file self.output.write(fileobj) - if my_file: - fileobj.close() - def close(self) -> None: """Shut all file descriptors (input and output) and clear all memory usage.""" self.pages = [] diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 843549ab0..96dde4c4c 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -59,6 +59,7 @@ _get_max_pdf_version_header, b_, deprecate_with_replacement, + StrByteType ) from .constants import AnnotationDictionaryAttributes from .constants import CatalogAttributes as CA @@ -107,6 +108,8 @@ ZoomArgsType, ZoomArgType, ) +from io import FileIO + logger = logging.getLogger(__name__) @@ -121,7 +124,7 @@ class PdfWriter: class (typically :class:`PdfReader`). """ - def __init__(self) -> None: + def __init__(self, fileobj: StrByteType = "") -> None: self._header = b"%PDF-1.3" self._objects: List[Optional[PdfObject]] = [] # array of indirect objects self._idnum_hash: Dict[bytes, IndirectObject] = {} @@ -158,6 +161,19 @@ def __init__(self) -> None: ) self._root: Optional[IndirectObject] = None self._root_object = root + self.fileobj = fileobj + self.my_file = False + + # Nothing to do. + def __enter__(self): + return self + + # Write to the fileobj. + def __exit__(self, *args): + self.write(self.fileobj) + + if self.my_file: + self.fileobj.close() @property def pdf_header(self) -> bytes: @@ -763,7 +779,7 @@ def encrypt( self._encrypt = self._add_object(encrypt) self._encrypt_key = key - def write(self, stream: StreamType) -> None: + def write_stream(self, stream: StreamType) -> None: """ Write the collection of pages added to this object out as a PDF file. @@ -794,6 +810,16 @@ def write(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof + def write(self, fileobj): + + if isinstance(fileobj, str) and fileobj: + self.fileobj = FileIO(fileobj, "wb") + self.my_file = True + else: + self.fileobj = fileobj + + self.write_stream(self.fileobj) + def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] stream.write(self.pdf_header + b"\n") From 2a88bd52786380df5c1091a54b056f49f05075ce Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Sat, 16 Jul 2022 07:51:12 +0800 Subject: [PATCH 02/29] Update PyPDF2/_merger.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index ccd01ab9a..0ee0e49bd 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -93,7 +93,7 @@ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: self.strict = strict # There is nothing to do. - def __enter__(self): + def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. From b6d0351a4ae2ea9cf48e0d5f196a783eaa698558 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 07:59:16 +0800 Subject: [PATCH 03/29] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 0ee0e49bd..e1cb75f69 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -75,11 +75,11 @@ class PdfMerger: See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. - :param fileobj: Output file. Can be a filename or any kind of - file-like object. """ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: From 80813aa910025c5de8954020af11e9248f7ac298 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:04:02 +0800 Subject: [PATCH 04/29] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index e1cb75f69..476a35cce 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. from io import BytesIO, FileIO, IOBase -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type, TracebackType from ._encryption import Encryption from ._page import PageObject @@ -97,7 +97,8 @@ def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. - def __exit__(self, *args): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: + Optional[TracebackType]): self.write(self.fileobj) self.close() From e6ec1f6c847b7fdfab18e0c28a9c27ac5efa894e Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:15:09 +0800 Subject: [PATCH 05/29] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_merger.py | 4 ++-- PyPDF2/_writer.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 476a35cce..43f22fd75 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -97,8 +97,8 @@ def __enter__(self) -> "PdfMerger": return self # Write to the fileobj and close the merger. - def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: - Optional[TracebackType]): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], + traceback: Optional[TracebackType]): self.write(self.fileobj) self.close() diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 96dde4c4c..88d416a6b 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -47,6 +47,8 @@ Tuple, Union, cast, + Type, + TracebackType ) from PyPDF2.errors import PdfReadWarning @@ -169,7 +171,8 @@ def __enter__(self): return self # Write to the fileobj. - def __exit__(self, *args): + def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], + traceback: Optional[TracebackType]): self.write(self.fileobj) if self.my_file: From 386be5b398dce6cf10bc99534f2271ae325fa2c7 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:17:23 +0800 Subject: [PATCH 06/29] Update PyPDF2/_merger.py according to @MasterOdin's suggestions --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 88d416a6b..43a14d131 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -813,7 +813,7 @@ def write_stream(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof - def write(self, fileobj): + def write(self, fileobj: StrByteType) -> None: if isinstance(fileobj, str) and fileobj: self.fileobj = FileIO(fileobj, "wb") From 31786bba18190fbfe983b2410f430a68f783c19e Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 08:40:44 +0800 Subject: [PATCH 07/29] Sorry, I forgot to run before committing, so didn't notice that `TracebackType` is from `typing` but from `types`. --- PyPDF2/_merger.py | 3 ++- PyPDF2/_writer.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 43f22fd75..21b0cbab1 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -26,7 +26,7 @@ # POSSIBILITY OF SUCH DAMAGE. from io import BytesIO, FileIO, IOBase -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type, TracebackType +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type from ._encryption import Encryption from ._page import PageObject @@ -51,6 +51,7 @@ ) from .pagerange import PageRange, PageRangeSpec from .types import FitType, LayoutType, OutlinesType, PagemodeType, ZoomArgType +from types import TracebackType ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore" diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 43a14d131..cc48ff2f2 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -47,8 +47,7 @@ Tuple, Union, cast, - Type, - TracebackType + Type ) from PyPDF2.errors import PdfReadWarning @@ -111,6 +110,7 @@ ZoomArgType, ) from io import FileIO +from types import TracebackType logger = logging.getLogger(__name__) From ae1bec010b9ff8a1300c243a061e177abae584e7 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 16 Jul 2022 09:28:55 +0800 Subject: [PATCH 08/29] Modify the wrong closing place that cause. --- PyPDF2/_writer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index cc48ff2f2..355132dde 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -164,7 +164,6 @@ def __init__(self, fileobj: StrByteType = "") -> None: self._root: Optional[IndirectObject] = None self._root_object = root self.fileobj = fileobj - self.my_file = False # Nothing to do. def __enter__(self): @@ -175,9 +174,6 @@ def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseEx traceback: Optional[TracebackType]): self.write(self.fileobj) - if self.my_file: - self.fileobj.close() - @property def pdf_header(self) -> bytes: """ @@ -814,14 +810,18 @@ def write_stream(self, stream: StreamType) -> None: stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def write(self, fileobj: StrByteType) -> None: + my_file = False if isinstance(fileobj, str) and fileobj: - self.fileobj = FileIO(fileobj, "wb") - self.my_file = True + fileobj = FileIO(fileobj, "wb") + my_file = True else: - self.fileobj = fileobj + ... + + self.write_stream(fileobj) - self.write_stream(self.fileobj) + if my_file: + fileobj.close() def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From b695113ea65b2ed12d185cc0c4343411118065f9 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 08:30:38 +0800 Subject: [PATCH 09/29] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 355132dde..55d3fe0f1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -816,7 +816,7 @@ def write(self, fileobj: StrByteType) -> None: fileobj = FileIO(fileobj, "wb") my_file = True else: - ... + pass self.write_stream(fileobj) From 58797c2f6a4ce8d6a7984bb133a6fcd6a3450590 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:03 +0800 Subject: [PATCH 10/29] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 55d3fe0f1..6ff7a56f1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -166,7 +166,7 @@ def __init__(self, fileobj: StrByteType = "") -> None: self.fileobj = fileobj # Nothing to do. - def __enter__(self): + def __enter__(self) -> "PdfWriter": return self # Write to the fileobj. From 1caa9ecb9389ac02186aa3af6f8c219e9c57ce41 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:26 +0800 Subject: [PATCH 11/29] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 6ff7a56f1..a1312678f 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -171,7 +171,7 @@ def __enter__(self) -> "PdfWriter": # Write to the fileobj. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]): + traceback: Optional[TracebackType]) -> None: self.write(self.fileobj) @property From 4fbe3cc679b573069d93fcc59b9becc65372f1d7 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Mon, 18 Jul 2022 09:06:40 +0800 Subject: [PATCH 12/29] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 21b0cbab1..44f623ca7 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -99,7 +99,7 @@ def __enter__(self) -> "PdfMerger": # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], - traceback: Optional[TracebackType]): + traceback: Optional[TracebackType]) -> None: self.write(self.fileobj) self.close() From d598e8ab0de9beeaed1c7043095e8fba7aeb0685 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Mon, 18 Jul 2022 09:11:18 +0800 Subject: [PATCH 13/29] Modify PyPDF2/_writer.py according to @MasterOdin's suggestions --- PyPDF2/_writer.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 55d3fe0f1..76b5fcd75 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -812,7 +812,10 @@ def write_stream(self, stream: StreamType) -> None: def write(self, fileobj: StrByteType) -> None: my_file = False - if isinstance(fileobj, str) and fileobj: + if fileobj == "": + raise ValueError(f"Output(fileobj={fileobj}) is empty.") + + if isinstance(fileobj, str): fileobj = FileIO(fileobj, "wb") my_file = True else: @@ -1630,7 +1633,7 @@ def _get_page_layout(self) -> Optional[LayoutType]: def getPageLayout(self) -> Optional[LayoutType]: # pragma: no cover """ - .. deprecated:: 1.28.0 + .. deprecated:: 1.28.0raise Use :py:attr:`page_layout` instead. """ From 519dad1246203d0bcf2fda7d1c5a57b37b9c6019 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 09:26:10 +0800 Subject: [PATCH 14/29] Fix accident --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 85ecae30e..d223027d1 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -1633,7 +1633,7 @@ def _get_page_layout(self) -> Optional[LayoutType]: def getPageLayout(self) -> Optional[LayoutType]: # pragma: no cover """ - .. deprecated:: 1.28.0raise + .. deprecated:: 1.28.0 Use :py:attr:`page_layout` instead. """ From 90af68a30c90c39a3a6104f5ba99d915e3691dff Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 11:06:33 +0800 Subject: [PATCH 15/29] Fix error raising while using half traditional usage --- PyPDF2/_merger.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 44f623ca7..4e2876395 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -100,7 +100,8 @@ def __enter__(self) -> "PdfMerger": # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - self.write(self.fileobj) + if self.fileobj: + self.write(self.fileobj) self.close() def merge( From 0f6765805a8f8433974b813c78d799ebcc057c4a Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 11:17:20 +0800 Subject: [PATCH 16/29] Add a unit test (Problems still exist, please help.) --- tests/test_merger.py | 40 ++++++++++++++++++++++++------ tests/test_writer.py | 58 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 80 insertions(+), 18 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 2a9d9e4e6..3a7fd028c 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -17,13 +17,13 @@ sys.path.append(PROJECT_ROOT) -def test_merge(): +def merger_operate(merger): pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") - merger = PyPDF2.PdfMerger() + # merger = PyPDF2.PdfMerger() # string path: merger.append(pdf_path) @@ -78,10 +78,6 @@ def test_merge(): merger.set_page_layout("/SinglePage") merger.set_page_mode("/UseThumbs") - tmp_path = "dont_commit_merged.pdf" - merger.write(tmp_path) - merger.close() - # Check if bookmarks are correct reader = PyPDF2.PdfReader(tmp_path) assert [ @@ -102,7 +98,37 @@ def test_merge(): # TODO: There seem to be no destinations for those links? - # Clean up + +tmp_path = "dont_commit_merged.pdf" + + +def test_writer_operations_by_totally_traditional_usage(): + merger = PdfMerger() + + merger_operate(merger) + + merger.write(tmp_path) + merger.close() + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_traditional_usage(): + with PdfMerger() as merger: + merger_operate(merger) + + merger.write(tmp_path) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operation_by_totally_new_usage(): + with PdfMerger(tmp_path) as merger: + merger_operate(merger) + + # cleanup os.remove(tmp_path) diff --git a/tests/test_writer.py b/tests/test_writer.py index f2d3a56c6..c1b5ebdb0 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -24,12 +24,9 @@ def test_writer_clone(): assert len(writer.pages) == 4 -def test_writer_operations(): +def writer_operate(writer): """ - This test just checks if the operation throws an exception. - - This should be done way more thoroughly: It should be checked if the - output is as expected. + To test the writer that initialized by each of the four usages. """ pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") pdf_outline_path = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf") @@ -37,7 +34,6 @@ def test_writer_operations(): reader = PdfReader(pdf_path) reader_outline = PdfReader(pdf_outline_path) - writer = PdfWriter() page = reader.pages[0] with pytest.raises(PageSizeNotDefinedError) as exc: writer.add_blank_page() @@ -80,17 +76,57 @@ def test_writer_operations(): writer.add_attachment("foobar.gif", b"foobarcontent") - # finally, write "output" to PyPDF2-output.pdf - tmp_path = "dont_commit_writer.pdf" - with open(tmp_path, "wb") as output_stream: - writer.write(output_stream) - # Check that every key in _idnum_hash is correct objects_hash = [o.hash_value() for o in writer._objects] for k, v in writer._idnum_hash.items(): assert v.pdf == writer assert k in objects_hash, "Missing %s" % v + +tmp_path = "dont_commit_writer.pdf" + + +def test_writer_operations_by_totally_traditional_usage(): + writer = PdfWriter() + + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + with open(tmp_path, "wb") as output_stream: + writer.write(output_stream) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_traditional_usage(): + with PdfWriter() as writer: + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + with open(tmp_path, "wb") as output_stream: + writer.write(output_stream) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operations_by_semi_new_traditional_usage(): + with PdfWriter() as writer: + writer_operate(writer) + + # finally, write "output" to PyPDF2-output.pdf + writer.write(tmp_path) + + # cleanup + os.remove(tmp_path) + + +def test_writer_operation_by_totally_new_usage(): + # This includes write "output" to PyPDF2-output.pdf + with PdfWriter(tmp_path) as writer: + writer_operate(writer) + # cleanup os.remove(tmp_path) From a6f973e20e224519520c62363cb330a4f85aee12 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Thu, 21 Jul 2022 19:13:43 +0800 Subject: [PATCH 17/29] Update PyPDF2/_merger.py according to @MartinThoma's suggestions Co-authored-by: Martin Thoma --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 4e2876395..7db906695 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -97,9 +97,9 @@ def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: def __enter__(self) -> "PdfMerger": return self - # Write to the fileobj and close the merger. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: + """Write to the fileobj and close the merger.""" if self.fileobj: self.write(self.fileobj) self.close() From 580ad8c9fdcfbd7220764e436f829e13030b6d50 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:17:34 +0800 Subject: [PATCH 18/29] Modify PyPDF2/_merger.py according to flake8 --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 7db906695..a48a99cc7 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -99,7 +99,7 @@ def __enter__(self) -> "PdfMerger": def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - """Write to the fileobj and close the merger.""" + """Write to the fileobj and close the merger.""" if self.fileobj: self.write(self.fileobj) self.close() From bf264bca1c478f77c87f565739057df81ce13565 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:32:25 +0800 Subject: [PATCH 19/29] Modify to compatible with the existing usage --- PyPDF2/_merger.py | 5 ++++- PyPDF2/_writer.py | 3 +-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index a48a99cc7..f50a8c249 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -285,7 +285,10 @@ def write(self, fileobj: StrByteType) -> None: self._write_bookmarks() # Write the output to the file - self.output.write(fileobj) + my_file = self.output.write(fileobj) + + if my_file: + fileobj.close() def close(self) -> None: """Shut all file descriptors (input and output) and clear all memory usage.""" diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index d5a96b6de..347ab1498 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -823,8 +823,7 @@ def write(self, fileobj: StrByteType) -> None: self.write_stream(fileobj) - if my_file: - fileobj.close() + return my_file def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From c705300bf1d0f832403d7a78672abdd87aa40218 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:48:57 +0800 Subject: [PATCH 20/29] Modify to compatible with the with .. as ... usage --- PyPDF2/_writer.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 347ab1498..a3be881de 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -164,9 +164,11 @@ def __init__(self, fileobj: StrByteType = "") -> None: self._root: Optional[IndirectObject] = None self._root_object = root self.fileobj = fileobj + self.with_as_usage = False - # Nothing to do. + # Let it know whether it is initialized by with ... as ... usage or not def __enter__(self) -> "PdfWriter": + self.with_as_usage = True return self # Write to the fileobj. @@ -823,6 +825,9 @@ def write(self, fileobj: StrByteType) -> None: self.write_stream(fileobj) + if self.with_as_usage: + fileobj.close() + return my_file def _write_header(self, stream: StreamType) -> List[int]: From 2940589924da978bf60bcca95de2c127d639cbc2 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:56:11 +0800 Subject: [PATCH 21/29] Removed a meaningless annotation --- tests/test_merger.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 3a7fd028c..48b3a75ac 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -23,8 +23,6 @@ def merger_operate(merger): pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf") pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf") - # merger = PyPDF2.PdfMerger() - # string path: merger.append(pdf_path) merger.append(outline) From cda38ac04d6c66f59345dae1eb9dbcc32c48643a Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Thu, 21 Jul 2022 19:57:39 +0800 Subject: [PATCH 22/29] Fixed wrong test function names --- tests/test_merger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_merger.py b/tests/test_merger.py index 48b3a75ac..0807e903b 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -100,7 +100,7 @@ def merger_operate(merger): tmp_path = "dont_commit_merged.pdf" -def test_writer_operations_by_totally_traditional_usage(): +def test_merger_operations_by_totally_traditional_usage(): merger = PdfMerger() merger_operate(merger) @@ -112,7 +112,7 @@ def test_writer_operations_by_totally_traditional_usage(): os.remove(tmp_path) -def test_writer_operations_by_semi_traditional_usage(): +def test_merger_operations_by_semi_traditional_usage(): with PdfMerger() as merger: merger_operate(merger) @@ -122,7 +122,7 @@ def test_writer_operations_by_semi_traditional_usage(): os.remove(tmp_path) -def test_writer_operation_by_totally_new_usage(): +def test_merger_operation_by_totally_new_usage(): with PdfMerger(tmp_path) as merger: merger_operate(merger) From 0b181980a0f3fe842ec5443021104c0afcb56496 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 22 Jul 2022 09:36:46 +0800 Subject: [PATCH 23/29] Fixed those I can fix only. --- PyPDF2/_merger.py | 2 +- PyPDF2/_writer.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index f50a8c249..0adccf514 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -285,7 +285,7 @@ def write(self, fileobj: StrByteType) -> None: self._write_bookmarks() # Write the output to the file - my_file = self.output.write(fileobj) + my_file, fileobj = self.output.write(fileobj) if my_file: fileobj.close() diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index a3be881de..d494b0d5f 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -174,7 +174,8 @@ def __enter__(self) -> "PdfWriter": # Write to the fileobj. def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException], traceback: Optional[TracebackType]) -> None: - self.write(self.fileobj) + if self.fileobj: + self.write(self.fileobj) @property def pdf_header(self) -> bytes: @@ -828,7 +829,7 @@ def write(self, fileobj: StrByteType) -> None: if self.with_as_usage: fileobj.close() - return my_file + return my_file, fileobj def _write_header(self, stream: StreamType) -> List[int]: object_positions = [] From 62f970f1d4df2fddfeb985d1a782230c6f7c0287 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Fri, 22 Jul 2022 19:29:41 +0800 Subject: [PATCH 24/29] Removed useless `else` section Co-authored-by: Martin Thoma --- PyPDF2/_writer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index d494b0d5f..c62e6c5d0 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -821,8 +821,6 @@ def write(self, fileobj: StrByteType) -> None: if isinstance(fileobj, str): fileobj = FileIO(fileobj, "wb") my_file = True - else: - pass self.write_stream(fileobj) From d85c91b540efbd375507ca9b8fbb777b6bbc0921 Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Fri, 22 Jul 2022 19:34:45 +0800 Subject: [PATCH 25/29] Renamed argument name `fileobj` back to `stream` to keep the existing workflow Co-authored-by: Martin Thoma --- PyPDF2/_writer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index c62e6c5d0..36da8360c 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -812,7 +812,7 @@ def write_stream(self, stream: StreamType) -> None: self._write_trailer(stream) stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof - def write(self, fileobj: StrByteType) -> None: + def write(self, stream: StrByteType) -> None: my_file = False if fileobj == "": From 5cd3f3e670071ac3a7d2d49698bc605dda1def79 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Fri, 22 Jul 2022 19:55:06 +0800 Subject: [PATCH 26/29] Modified annoation for `PdfWriter().write()` and `PdfWriter().write_stream()` --- PyPDF2/_writer.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 36da8360c..1782fe810 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -782,12 +782,6 @@ def encrypt( self._encrypt_key = key def write_stream(self, stream: StreamType) -> None: - """ - Write the collection of pages added to this object out as a PDF file. - - :param stream: An object to write the file to. The object must support - the write method and the tell method, similar to a file object. - """ if hasattr(stream, "mode") and "b" not in stream.mode: warnings.warn( f"File <{stream.name}> to write to is not in binary mode. " # type: ignore @@ -813,6 +807,14 @@ def write_stream(self, stream: StreamType) -> None: stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof def write(self, stream: StrByteType) -> None: + """ + Write the collection of pages added to this object out as a PDF file. + + :param stream: An object to write the file to. The object can support + the write method and the tell method, similar to a file object, or + be a file path, just like the fileobj, just named it stream to keep + existing workflow. + """ my_file = False if fileobj == "": From a7346ef13ee2514107d6cab26b52a224d0036d0b Mon Sep 17 00:00:00 2001 From: Jianzheng Luo Date: Sat, 23 Jul 2022 15:04:09 +0800 Subject: [PATCH 27/29] Switched `fileobj` and `strict` in initializing `PdfWriter()` to keep the existing workflow Co-authored-by: Matthew Peveler --- PyPDF2/_merger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index 0adccf514..dd95b43da 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -83,7 +83,7 @@ class PdfMerger: Defaults to ``False``. """ - def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None: + def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: self.inputs: List[Tuple[Any, PdfReader, bool]] = [] self.pages: List[Any] = [] self.output: Optional[PdfWriter] = PdfWriter() From d3e62ee81962c70d6bd5e5346ceb241cce859638 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sat, 23 Jul 2022 15:06:53 +0800 Subject: [PATCH 28/29] Modified annoation to make the it match the arguments --- PyPDF2/_merger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/PyPDF2/_merger.py b/PyPDF2/_merger.py index dd95b43da..8841686de 100644 --- a/PyPDF2/_merger.py +++ b/PyPDF2/_merger.py @@ -76,11 +76,11 @@ class PdfMerger: See the functions :meth:`merge()` (or :meth:`append()`) and :meth:`write()` for usage information. - :param fileobj: Output file. Can be a filename or any kind of - file-like object. :param bool strict: Determines whether user should be warned of all problems and also causes some correctable problems to be fatal. Defaults to ``False``. + :param fileobj: Output file. Can be a filename or any kind of + file-like object. """ def __init__(self, strict: bool = False, fileobj: StrByteType = "") -> None: From 59be94d69386ce7b907c11ec1b2ea7b756ab32d3 Mon Sep 17 00:00:00 2001 From: JianzhengLuo Date: Sun, 24 Jul 2022 15:40:55 +0800 Subject: [PATCH 29/29] Modified undefined name `fileobj` --- PyPDF2/_writer.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 1782fe810..08d08a4ee 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -817,19 +817,19 @@ def write(self, stream: StrByteType) -> None: """ my_file = False - if fileobj == "": - raise ValueError(f"Output(fileobj={fileobj}) is empty.") + if stream == "": + raise ValueError(f"Output(stream={stream}) is empty.") - if isinstance(fileobj, str): - fileobj = FileIO(fileobj, "wb") + if isinstance(stream, str): + stream = FileIO(stream, "wb") my_file = True - self.write_stream(fileobj) + self.write_stream(stream) if self.with_as_usage: - fileobj.close() + stream.close() - return my_file, fileobj + return my_file, stream def _write_header(self, stream: StreamType) -> List[int]: object_positions = []