Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add with ... as ... usage #1117

Closed
wants to merge 34 commits into from
Closed
Show file tree
Hide file tree
Changes from 30 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a2b7b55
Add `with ... as ...` usage (#1108)
JianzhengLuo Jul 15, 2022
2a88bd5
Update PyPDF2/_merger.py according to @MasterOdin's suggestions
JianzhengLuo Jul 15, 2022
b6d0351
Update PyPDF2/_merger.py according to @MasterOdin's suggestions
JianzhengLuo Jul 15, 2022
80813aa
Update PyPDF2/_merger.py according to @MasterOdin's suggestions
JianzhengLuo Jul 16, 2022
e6ec1f6
Update PyPDF2/_merger.py according to @MasterOdin's suggestions
JianzhengLuo Jul 16, 2022
386be5b
Update PyPDF2/_merger.py according to @MasterOdin's suggestions
JianzhengLuo Jul 16, 2022
31786bb
Sorry, I forgot to run before committing, so didn't notice that `Trac…
JianzhengLuo Jul 16, 2022
ae1bec0
Modify the wrong closing place that cause.
JianzhengLuo Jul 16, 2022
0b7c64e
Merge branch 'main' into add-with-as-usage-#1108
JianzhengLuo Jul 17, 2022
b695113
Modify PyPDF2/_writer.py according to @MasterOdin's suggestions
JianzhengLuo Jul 18, 2022
58797c2
Modify PyPDF2/_writer.py according to @MasterOdin's suggestions
JianzhengLuo Jul 18, 2022
1caa9ec
Modify PyPDF2/_writer.py according to @MasterOdin's suggestions
JianzhengLuo Jul 18, 2022
4fbe3cc
Modify PyPDF2/_writer.py according to @MasterOdin's suggestions
JianzhengLuo Jul 18, 2022
d598e8a
Modify PyPDF2/_writer.py according to @MasterOdin's suggestions
JianzhengLuo Jul 18, 2022
7ecf9ff
Merge branch 'add-with-as-usage-#1108' of https://github.com/Jianzhen…
JianzhengLuo Jul 18, 2022
562ebc7
Merge branch 'main' into add-with-as-usage-#1108
Jul 19, 2022
519dad1
Fix accident
JianzhengLuo Jul 21, 2022
336053a
Merge branch 'add-with-as-usage-#1108' of https://github.com/Jianzhen…
JianzhengLuo Jul 21, 2022
90af68a
Fix error raising while using half traditional usage
JianzhengLuo Jul 21, 2022
0f67658
Add a unit test (Problems still exist, please help.)
JianzhengLuo Jul 21, 2022
a6f973e
Update PyPDF2/_merger.py according to @MartinThoma's suggestions
JianzhengLuo Jul 21, 2022
580ad8c
Modify PyPDF2/_merger.py according to flake8
JianzhengLuo Jul 21, 2022
bf264bc
Modify to compatible with the existing usage
JianzhengLuo Jul 21, 2022
c705300
Modify to compatible with the with .. as ... usage
JianzhengLuo Jul 21, 2022
2940589
Removed a meaningless annotation
JianzhengLuo Jul 21, 2022
cda38ac
Fixed wrong test function names
JianzhengLuo Jul 21, 2022
0b18198
Fixed those I can fix only.
JianzhengLuo Jul 22, 2022
62f970f
Removed useless `else` section
JianzhengLuo Jul 22, 2022
d85c91b
Renamed argument name `fileobj` back to `stream` to keep the existing…
JianzhengLuo Jul 22, 2022
5cd3f3e
Modified annoation for `PdfWriter().write()` and `PdfWriter().write_s…
JianzhengLuo Jul 22, 2022
a7346ef
Switched `fileobj` and `strict` in initializing `PdfWriter()` to keep…
JianzhengLuo Jul 23, 2022
d3e62ee
Modified annoation to make the it match the arguments
JianzhengLuo Jul 23, 2022
59be94d
Modified undefined name `fileobj`
JianzhengLuo Jul 24, 2022
092f209
Merge branch 'main' into add-with-as-usage-#1108
MartinThoma Aug 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions PyPDF2/_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# POSSIBILITY OF SUCH DAMAGE.

from io import BytesIO, FileIO, IOBase
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast, Type

from ._encryption import Encryption
from ._page import PageObject
Expand All @@ -51,6 +51,7 @@
)
from .pagerange import PageRange, PageRangeSpec
from .types import FitType, LayoutType, OutlinesType, PagemodeType, ZoomArgType
from types import TracebackType

ERR_CLOSED_WRITER = "close() was called and thus the writer cannot be used anymore"

Expand All @@ -75,20 +76,34 @@ class PdfMerger:
See the functions :meth:`merge()<merge>` (or :meth:`append()<append>`)
and :meth:`write()<write>` for usage information.

:param fileobj: Output file. Can be a filename or any kind of
file-like object.
:param bool strict: Determines whether user should be warned of all
problems and also causes some correctable problems to be fatal.
Defaults to ``False``.
"""

def __init__(self, strict: bool = False) -> None:
def __init__(self, fileobj: StrByteType = "", strict: bool = False) -> None:
JianzhengLuo marked this conversation as resolved.
Show resolved Hide resolved
self.inputs: List[Tuple[Any, PdfReader, bool]] = []
self.pages: List[Any] = []
self.output: Optional[PdfWriter] = PdfWriter()
self.bookmarks: OutlinesType = []
self.named_dests: List[Any] = []
self.id_count = 0
self.fileobj = fileobj
self.strict = strict

# There is nothing to do.
def __enter__(self) -> "PdfMerger":
return self

def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException],
traceback: Optional[TracebackType]) -> None:
"""Write to the fileobj and close the merger."""
if self.fileobj:
self.write(self.fileobj)
self.close()

def merge(
self,
position: int,
Expand Down Expand Up @@ -252,10 +267,6 @@ def write(self, fileobj: StrByteType) -> None:
"""
if self.output is None:
raise RuntimeError(ERR_CLOSED_WRITER)
my_file = False
if isinstance(fileobj, str):
fileobj = FileIO(fileobj, "wb")
my_file = True
JianzhengLuo marked this conversation as resolved.
Show resolved Hide resolved

# Add pages to the PdfWriter
# The commented out line below was replaced with the two lines below it
Expand All @@ -274,7 +285,7 @@ def write(self, fileobj: StrByteType) -> None:
self._write_bookmarks()

# Write the output to the file
self.output.write(fileobj)
my_file, fileobj = self.output.write(fileobj)

if my_file:
fileobj.close()
Expand Down
53 changes: 45 additions & 8 deletions PyPDF2/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
Tuple,
Union,
cast,
Type
)

from PyPDF2.errors import PdfReadWarning
Expand All @@ -59,6 +60,7 @@
_get_max_pdf_version_header,
b_,
deprecate_with_replacement,
StrByteType
)
from .constants import AnnotationDictionaryAttributes
from .constants import CatalogAttributes as CA
Expand Down Expand Up @@ -107,6 +109,9 @@
ZoomArgsType,
ZoomArgType,
)
from io import FileIO
from types import TracebackType


logger = logging.getLogger(__name__)

Expand All @@ -121,7 +126,7 @@ class PdfWriter:
class (typically :class:`PdfReader<PyPDF2.PdfReader>`).
"""

def __init__(self) -> None:
def __init__(self, fileobj: StrByteType = "") -> None:
self._header = b"%PDF-1.3"
self._objects: List[Optional[PdfObject]] = [] # array of indirect objects
self._idnum_hash: Dict[bytes, IndirectObject] = {}
Expand Down Expand Up @@ -158,6 +163,19 @@ def __init__(self) -> None:
)
self._root: Optional[IndirectObject] = None
self._root_object = root
self.fileobj = fileobj
self.with_as_usage = False

# Let it know whether it is initialized by with ... as ... usage or not
def __enter__(self) -> "PdfWriter":
self.with_as_usage = True
return self

# Write to the fileobj.
def __exit__(self, exc_type: Optional[Type[BaseException]], exc: Optional[BaseException],
traceback: Optional[TracebackType]) -> None:
if self.fileobj:
self.write(self.fileobj)

@property
def pdf_header(self) -> bytes:
Expand Down Expand Up @@ -763,13 +781,7 @@ def encrypt(
self._encrypt = self._add_object(encrypt)
self._encrypt_key = key

def write(self, stream: StreamType) -> None:
"""
Write the collection of pages added to this object out as a PDF file.

:param stream: An object to write the file to. The object must support
the write method and the tell method, similar to a file object.
"""
def write_stream(self, stream: StreamType) -> None:
if hasattr(stream, "mode") and "b" not in stream.mode:
warnings.warn(
f"File <{stream.name}> to write to is not in binary mode. " # type: ignore
Expand All @@ -794,6 +806,31 @@ def write(self, stream: StreamType) -> None:
self._write_trailer(stream)
stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof

def write(self, stream: StrByteType) -> None:
"""
Write the collection of pages added to this object out as a PDF file.

:param stream: An object to write the file to. The object can support
the write method and the tell method, similar to a file object, or
be a file path, just like the fileobj, just named it stream to keep
existing workflow.
"""
my_file = False

if fileobj == "":
raise ValueError(f"Output(fileobj={fileobj}) is empty.")

if isinstance(fileobj, str):
fileobj = FileIO(fileobj, "wb")
my_file = True

self.write_stream(fileobj)

if self.with_as_usage:
fileobj.close()

return my_file, fileobj

def _write_header(self, stream: StreamType) -> List[int]:
object_positions = []
stream.write(self.pdf_header + b"\n")
Expand Down
40 changes: 32 additions & 8 deletions tests/test_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@
sys.path.append(PROJECT_ROOT)


def test_merge():
def merger_operate(merger):
JianzhengLuo marked this conversation as resolved.
Show resolved Hide resolved
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
outline = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")
pdf_forms = os.path.join(RESOURCE_ROOT, "pdflatex-forms.pdf")
pdf_pw = os.path.join(RESOURCE_ROOT, "libreoffice-writer-password.pdf")

merger = PyPDF2.PdfMerger()

# string path:
merger.append(pdf_path)
merger.append(outline)
Expand Down Expand Up @@ -78,10 +76,6 @@ def test_merge():
merger.set_page_layout("/SinglePage")
merger.set_page_mode("/UseThumbs")

tmp_path = "dont_commit_merged.pdf"
merger.write(tmp_path)
merger.close()

# Check if bookmarks are correct
reader = PyPDF2.PdfReader(tmp_path)
assert [
Expand All @@ -102,7 +96,37 @@ def test_merge():

# TODO: There seem to be no destinations for those links?

# Clean up

tmp_path = "dont_commit_merged.pdf"


def test_merger_operations_by_totally_traditional_usage():
merger = PdfMerger()

merger_operate(merger)

merger.write(tmp_path)
merger.close()

# cleanup
os.remove(tmp_path)


def test_merger_operations_by_semi_traditional_usage():
with PdfMerger() as merger:
merger_operate(merger)

merger.write(tmp_path)

# cleanup
os.remove(tmp_path)


def test_merger_operation_by_totally_new_usage():
with PdfMerger(tmp_path) as merger:
merger_operate(merger)

# cleanup
os.remove(tmp_path)


Expand Down
58 changes: 47 additions & 11 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,16 @@ def test_writer_clone():
assert len(writer.pages) == 4


def test_writer_operations():
def writer_operate(writer):
"""
This test just checks if the operation throws an exception.

This should be done way more thoroughly: It should be checked if the
output is as expected.
To test the writer that initialized by each of the four usages.
"""
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
pdf_outline_path = os.path.join(RESOURCE_ROOT, "pdflatex-outline.pdf")

reader = PdfReader(pdf_path)
reader_outline = PdfReader(pdf_outline_path)

writer = PdfWriter()
page = reader.pages[0]
with pytest.raises(PageSizeNotDefinedError) as exc:
writer.add_blank_page()
Expand Down Expand Up @@ -80,17 +76,57 @@ def test_writer_operations():

writer.add_attachment("foobar.gif", b"foobarcontent")

# finally, write "output" to PyPDF2-output.pdf
tmp_path = "dont_commit_writer.pdf"
with open(tmp_path, "wb") as output_stream:
writer.write(output_stream)

# Check that every key in _idnum_hash is correct
objects_hash = [o.hash_value() for o in writer._objects]
for k, v in writer._idnum_hash.items():
assert v.pdf == writer
assert k in objects_hash, "Missing %s" % v


tmp_path = "dont_commit_writer.pdf"


def test_writer_operations_by_totally_traditional_usage():
writer = PdfWriter()

writer_operate(writer)

# finally, write "output" to PyPDF2-output.pdf
with open(tmp_path, "wb") as output_stream:
writer.write(output_stream)

# cleanup
os.remove(tmp_path)


def test_writer_operations_by_semi_traditional_usage():
with PdfWriter() as writer:
writer_operate(writer)

# finally, write "output" to PyPDF2-output.pdf
with open(tmp_path, "wb") as output_stream:
writer.write(output_stream)

# cleanup
os.remove(tmp_path)


def test_writer_operations_by_semi_new_traditional_usage():
with PdfWriter() as writer:
writer_operate(writer)

# finally, write "output" to PyPDF2-output.pdf
writer.write(tmp_path)

# cleanup
os.remove(tmp_path)


def test_writer_operation_by_totally_new_usage():
# This includes write "output" to PyPDF2-output.pdf
with PdfWriter(tmp_path) as writer:
writer_operate(writer)

# cleanup
os.remove(tmp_path)

Expand Down