Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Increase Test coverage #756

Merged
merged 13 commits into from
Apr 15, 2022
12 changes: 9 additions & 3 deletions PyPDF2/filters.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down Expand Up @@ -40,7 +38,7 @@
from cStringIO import StringIO
else:
from io import StringIO
import struct
import struct

try:
import zlib
Expand Down Expand Up @@ -356,6 +354,10 @@ def decode(data, decodeParms=None):
class CCITTFaxDecode(object):
def decode(data, decodeParms=None, height=0):
if decodeParms:
from PyPDF2.generic import ArrayObject
if isinstance(decodeParms, ArrayObject):
if len(decodeParms) == 1:
decodeParms = decodeParms[0]
if decodeParms.get("/K", 1) == -1:
CCITTgroup = 4
else:
Expand Down Expand Up @@ -451,6 +453,10 @@ def _xobj_to_image(x_object_obj):
img_byte_arr = io.BytesIO()
img.save(img_byte_arr, format="PNG")
data = img_byte_arr.getvalue()
elif x_object_obj["/Filter"] in (["/LZWDecode"], ['/ASCII85Decode'], ['/CCITTFaxDecode']):
from PyPDF2.utils import b_
extension = ".png"
data = b_(data)
elif x_object_obj["/Filter"] == "/DCTDecode":
extension = ".jpg"
elif x_object_obj["/Filter"] == "/JPXDecode":
Expand Down
49 changes: 25 additions & 24 deletions PyPDF2/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import decimal
import codecs

from PyPDF2.utils import ERR_STREAM_TRUNCATED_PREMATURELY

ObjectPrefix = b_('/<[tf(n%')
NumberSigns = b_('+-')
IndirectPattern = re.compile(b_(r"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]"))
Expand Down Expand Up @@ -199,17 +201,15 @@ def readFromStream(stream, pdf):
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
break
idnum += tok
generation = b_("")
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
if not generation:
continue
Expand Down Expand Up @@ -273,10 +273,11 @@ def readFromStream(stream):
readFromStream = staticmethod(readFromStream)


##
# Given a string (either a "str" or "unicode"), create a ByteStringObject or a
# TextStringObject to represent the string.
def createStringObject(string):
"""
Given a string (either a "str" or "unicode"), create a ByteStringObject or a
TextStringObject to represent the string.
"""
if isinstance(string, utils.string_type):
return TextStringObject(string)
elif isinstance(string, utils.bytes_type):
Expand Down Expand Up @@ -306,8 +307,7 @@ def readHexStringFromStream(stream):
while True:
tok = readNonWhitespace(stream)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok == b_(">"):
break
x += tok
Expand All @@ -328,8 +328,7 @@ def readStringFromStream(stream):
while True:
tok = stream.read(1)
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
if tok == b_("("):
parens += 1
elif tok == b_(")"):
Expand Down Expand Up @@ -392,16 +391,17 @@ def readStringFromStream(stream):
return createStringObject(txt)


##
# Represents a string object where the text encoding could not be determined.
# This occurs quite often, as the PDF spec doesn't provide an alternate way to
# represent strings -- for example, the encryption data stored in files (like
# /O) is clearly not text, but is still stored in a "String" object.
class ByteStringObject(utils.bytes_type, PdfObject):
"""
Represents a string object where the text encoding could not be determined.
This occurs quite often, as the PDF spec doesn't provide an alternate way to
represent strings -- for example, the encryption data stored in files (like
/O) is clearly not text, but is still stored in a "String" object.
"""

##
# For compatibility with TextStringObject.original_bytes. This method
# returns self.
# self.
original_bytes = property(lambda self: self)

def writeToStream(self, stream, encryption_key):
Expand All @@ -413,12 +413,14 @@ def writeToStream(self, stream, encryption_key):
stream.write(b_(">"))


##
# Represents a string object that has been decoded into a real unicode string.
# If read from a PDF document, this string appeared to match the
# PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
# occur.
class TextStringObject(utils.string_type, PdfObject):
"""
Represents a string object that has been decoded into a real unicode string.
If read from a PDF document, this string appeared to match the
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding to
occur.
"""

autodetect_pdfdocencoding = False
autodetect_utf16 = False

Expand Down Expand Up @@ -569,8 +571,7 @@ def readFromStream(stream, pdf):
skipOverComment(stream)
continue
if not tok:
# stream has truncated prematurely
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)

if debug: print(("Tok:", tok))
if tok == b_(">"):
Expand Down
2 changes: 0 additions & 2 deletions PyPDF2/merger.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
Expand Down
9 changes: 1 addition & 8 deletions PyPDF2/pdf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-
#
# vim: sw=4:expandtab:foldmethod=marker
#
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <[email protected]>
#
Expand Down Expand Up @@ -1637,7 +1635,7 @@ def _getObjectFromStream(self, indirectReference):
streamData.seek(0, 0)
lines = streamData.readlines()
for i in range(0, len(lines)):
print((lines[i]))
print(lines[i])
streamData.seek(pos, 0)
try:
obj = readObject(streamData, self)
Expand Down Expand Up @@ -2588,11 +2586,6 @@ def mergeRotatedScaledTranslatedPage(self, page2, rotation, scale, tx, ty, expan
ctm[1][0], ctm[1][1],
ctm[2][0], ctm[2][1]], expand)

##
# Applys a transformation matrix the page.
#
# @param ctm A 6 elements tuple containing the operands of the
# transformation matrix
def addTransformation(self, ctm):
"""
Applies a transformation matrix to the page.
Expand Down
7 changes: 3 additions & 4 deletions PyPDF2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
except ImportError: # Py3
import builtins


ERR_STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
xrange_fn = getattr(builtins, "xrange", range)
_basestring = getattr(builtins, "basestring", str)

Expand Down Expand Up @@ -122,7 +122,7 @@ def skipOverComment(stream):
def readUntilRegex(stream, regex, ignore_eof=False):
"""
Reads until the regular expression pattern matched (ignore the match)
Raise PdfStreamError on premature end-of-file.
:raises PdfStreamError: on premature end-of-file
:param bool ignore_eof: If true, ignore end-of-line and return immediately
"""
name = b_('')
Expand All @@ -133,7 +133,7 @@ def readUntilRegex(stream, regex, ignore_eof=False):
if ignore_eof:
return name
else:
raise PdfStreamError("Stream has ended unexpectedly")
raise PdfStreamError(ERR_STREAM_TRUNCATED_PREMATURELY)
m = regex.search(tok)
if m is not None:
name += tok[:m.start()]
Expand Down Expand Up @@ -242,7 +242,6 @@ def b_(s):
bc[s] = r
return r
except Exception:
print(s)
r = s.encode('utf-8')
if len(s) < 2:
bc[s] = r
Expand Down
Binary file added Resources/imagemagick-ASCII85Decode.pdf
Binary file not shown.
Binary file added Resources/imagemagick-CCITTFaxDecode.pdf
Binary file not shown.
Binary file added Resources/imagemagick-images.pdf
Binary file not shown.
Binary file added Resources/imagemagick-lzw.pdf
Binary file not shown.
Binary file added Resources/metadata.pdf
Binary file not shown.
34 changes: 17 additions & 17 deletions Tests/test_basic_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,60 @@

import pytest

from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.utils import PdfReadError
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.pdf import convertToInt
from PyPDF2.utils import PdfReadError

TESTS_ROOT = os.path.abspath(os.path.dirname(__file__))
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")


def test_basic_features():
output = PdfFileWriter()
document1 = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
input1 = PdfFileReader(document1)
writer = PdfFileWriter()
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
reader = PdfFileReader(pdf_path)

# print how many pages input1 has:
print("document1.pdf has %d pages." % input1.getNumPages())
print("document1.pdf has %d pages." % reader.getNumPages())

# add page 1 from input1 to output document, unchanged
output.addPage(input1.getPage(0))
writer.addPage(reader.getPage(0))

# add page 2 from input1, but rotated clockwise 90 degrees
output.addPage(input1.getPage(0).rotateClockwise(90))
writer.addPage(reader.getPage(0).rotateClockwise(90))

# add page 3 from input1, rotated the other way:
output.addPage(input1.getPage(0).rotateCounterClockwise(90))
writer.addPage(reader.getPage(0).rotateCounterClockwise(90))
# alt: output.addPage(input1.getPage(0).rotateClockwise(270))

# add page 4 from input1, but first add a watermark from another PDF:
page4 = input1.getPage(0)
watermark_pdf = document1
page4 = reader.getPage(0)
watermark_pdf = pdf_path
watermark = PdfFileReader(watermark_pdf)
page4.mergePage(watermark.getPage(0))
output.addPage(page4)
writer.addPage(page4)

# add page 5 from input1, but crop it to half size:
page5 = input1.getPage(0)
page5 = reader.getPage(0)
page5.mediaBox.upperRight = (
page5.mediaBox.getUpperRight_x() / 2,
page5.mediaBox.getUpperRight_y() / 2,
)
output.addPage(page5)
writer.addPage(page5)

# add some Javascript to launch the print window on opening this PDF.
# the password dialog may prevent the print dialog from being shown,
# comment the the encription lines, if that's the case, to try this out
output.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

# encrypt your new PDF and add a password
password = "secret"
output.encrypt(password)
writer.encrypt(password)

# finally, write "output" to PyPDF2-output.pdf
with open("PyPDF2-output.pdf", "wb") as outputStream:
output.write(outputStream)
writer.write(outputStream)


def test_convertToInt():
Expand Down
36 changes: 21 additions & 15 deletions Tests/test_javascript.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os

import pytest

from PyPDF2 import PdfFileReader, PdfFileWriter
Expand All @@ -8,21 +9,28 @@
PROJECT_ROOT = os.path.dirname(TESTS_ROOT)
RESOURCE_ROOT = os.path.join(PROJECT_ROOT, "Resources")


@pytest.fixture
def pdf_file_writer():
ipdf = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
reader = PdfFileReader(os.path.join(RESOURCE_ROOT, "crazyones.pdf"))
pdf_file_writer = PdfFileWriter()
pdf_file_writer.appendPagesFromReader(ipdf)
pdf_file_writer.appendPagesFromReader(reader)
yield pdf_file_writer


def test_add_js(pdf_file_writer):
pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

assert (
"/Names" in pdf_file_writer._root_object
), "addJS should add a name catalog in the root object."
assert (
"/JavaScript" in pdf_file_writer._root_object["/Names"]
), "addJS should add a JavaScript name tree under the name catalog."
assert (
"/OpenAction" in pdf_file_writer._root_object
), "addJS should add an OpenAction to the catalog."

assert "/Names" in pdf_file_writer._root_object, "addJS should add a name catalog in the root object."
assert "/JavaScript" in pdf_file_writer._root_object["/Names"], "addJS should add a JavaScript name tree under the name catalog."
assert "/OpenAction" in pdf_file_writer._root_object, "addJS should add an OpenAction to the catalog."

def test_overwrite_js(pdf_file_writer):
def get_javascript_name():
Expand All @@ -31,14 +39,12 @@ def get_javascript_name():
assert "/Names" in pdf_file_writer._root_object["/Names"]["/JavaScript"]
return pdf_file_writer._root_object["/Names"]["/JavaScript"]["/Names"][0]

pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
first_js = get_javascript_name()

pdf_file_writer.addJS(
"this.print({bUI:true,bSilent:false,bShrinkToFit:true});"
)
pdf_file_writer.addJS("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
second_js = get_javascript_name()

assert first_js != second_js, "addJS should overwrite the previous script in the catalog."
assert (
first_js != second_js
), "addJS should overwrite the previous script in the catalog."
Loading