From 8264d3aff28f87debe2b4b18dae7f416a87282cf Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:08:30 +0200 Subject: [PATCH 1/8] DEV: Update pinned requirements --- requirements/ci-3.11.txt | 78 ++++++++++++++++------------------ requirements/ci.in | 4 +- requirements/ci.txt | 91 +++++++++++++++------------------------- requirements/dev.txt | 19 +++------ 4 files changed, 75 insertions(+), 117 deletions(-) diff --git a/requirements/ci-3.11.txt b/requirements/ci-3.11.txt index 210177118..d8ff0d550 100644 --- a/requirements/ci-3.11.txt +++ b/requirements/ci-3.11.txt @@ -2,56 +2,45 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --config=pyproject.toml --output-file=requirements/ci-3.11.txt requirements/ci.in +# pip-compile --output-file=requirements/ci-3.11.txt requirements/ci.in # -attrs==23.1.0 - # via flake8-bugbear -coverage[toml]==7.6.0 +cffi==1.17.1 + # via cryptography +coverage[toml]==7.6.4 # via # -r requirements/ci.in # pytest-cov -execnet==2.0.2 - # via pytest-xdist -flake8==6.1.0 - # via - # -r requirements/ci.in - # flake8-bugbear - # flake8-print -flake8-bugbear==23.7.10 +cryptography==43.0.3 # via -r requirements/ci.in -flake8-implicit-str-concat==0.4.0 - # via -r requirements/ci.in -flake8-print==5.0.0 - # via -r requirements/ci.in -fpdf2==2.4.1 +defusedxml==0.7.1 + # via fpdf2 +exceptiongroup==1.2.2 + # via pytest +execnet==2.1.1 + # via pytest-xdist +fonttools==4.54.1 + # via fpdf2 +fpdf2==2.8.1 # via -r requirements/ci.in iniconfig==2.0.0 # via pytest -mccabe==0.7.0 - # via flake8 -mypy==1.5.1 +mypy==1.12.1 # via -r requirements/ci.in mypy-extensions==1.0.0 # via mypy -packaging==23.1 +packaging==24.1 # via pytest -pillow==10.4.0 +pillow==11.0.0 # via # -r requirements/ci.in # fpdf2 -pluggy==1.2.0 +pluggy==1.5.0 # via pytest py-cpuinfo==9.0.0 # via pytest-benchmark -pycodestyle==2.11.0 - # via - # flake8 - # flake8-print -pycryptodome==3.18.0 - # via -r requirements/ci.in -pyflakes==3.1.0 - # via flake8 -pytest==7.4.0 +pycparser==2.22 + # via cffi +pytest==8.3.3 # via # -r requirements/ci.in # pytest-benchmark @@ -61,25 +50,28 @@ pytest==7.4.0 # pytest-xdist pytest-benchmark==4.0.0 # via -r requirements/ci.in -pytest-cov==4.1.0 +pytest-cov==5.0.0 # via -r requirements/ci.in -pytest-socket==0.6.0 +pytest-socket==0.7.0 # via -r requirements/ci.in -pytest-timeout==2.1.0 +pytest-timeout==2.3.1 # via -r requirements/ci.in -pytest-xdist==3.3.1 +pytest-xdist==3.6.1 # via -r requirements/ci.in -pyyaml==6.0.1 +pyyaml==6.0.2 # via -r requirements/ci.in -ruff==0.0.290 +ruff==0.7.0 # via -r requirements/ci.in -typeguard==4.1.2 - # via -r requirements/ci.in -types-dataclasses==0.6.6 +tomli==2.0.2 + # via + # coverage + # mypy + # pytest +typeguard==4.3.0 # via -r requirements/ci.in -types-pillow==10.0.0.2 +types-pillow==10.2.0.20240822 # via -r requirements/ci.in -typing-extensions==4.7.1 +typing-extensions==4.12.2 # via # mypy # typeguard diff --git a/requirements/ci.in b/requirements/ci.in index a7f8bc7d6..50b58cd3d 100644 --- a/requirements/ci.in +++ b/requirements/ci.in @@ -1,8 +1,8 @@ coverage -fpdf2==2.4.1 +fpdf2 mypy pillow -pycryptodome +cryptography pytest pytest-benchmark pytest-socket diff --git a/requirements/ci.txt b/requirements/ci.txt index 63c527ce4..a65985129 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -1,68 +1,48 @@ # -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/ci.in # -attrs==23.1.0 - # via flake8-bugbear -coverage[toml]==7.2.7 +cffi==1.17.1 + # via cryptography +coverage[toml]==7.6.1 # via # -r requirements/ci.in # pytest-cov -exceptiongroup==1.2.0 +cryptography==43.0.3 + # via -r requirements/ci.in +defusedxml==0.7.1 + # via fpdf2 +exceptiongroup==1.2.2 # via pytest -execnet==2.0.2 +execnet==2.1.1 # via pytest-xdist -flake8==5.0.4 - # via - # -r requirements/ci.in - # flake8-bugbear - # flake8-print -flake8-bugbear==23.3.12 - # via -r requirements/ci.in -flake8-implicit-str-concat==0.4.0 +fonttools==4.54.1 + # via fpdf2 +fpdf2==2.8.1 # via -r requirements/ci.in -flake8-print==5.0.0 - # via -r requirements/ci.in -fpdf2==2.4.1 - # via -r requirements/ci.in -importlib-metadata==4.2.0 - # via - # attrs - # flake8 - # pluggy - # pytest - # typeguard +importlib-metadata==8.5.0 + # via typeguard iniconfig==2.0.0 # via pytest -mccabe==0.7.0 - # via flake8 -more-itertools==9.1.0 - # via flake8-implicit-str-concat -mypy==1.4.1 +mypy==1.12.1 # via -r requirements/ci.in mypy-extensions==1.0.0 # via mypy -packaging==23.2 +packaging==24.1 # via pytest -pillow==9.5.0 +pillow==10.4.0 # via # -r requirements/ci.in # fpdf2 -pluggy==1.2.0 +pluggy==1.5.0 # via pytest py-cpuinfo==9.0.0 # via pytest-benchmark -pycodestyle==2.9.1 - # via - # flake8 - # flake8-print -pycryptodome==3.19.0 - # via -r requirements/ci.in -pyflakes==2.5.0 - # via flake8 -pytest==7.4.3 +pycparser==2.22 + # via cffi +pytest==8.3.3 # via # -r requirements/ci.in # pytest-benchmark @@ -72,33 +52,28 @@ pytest==7.4.3 # pytest-xdist pytest-benchmark==4.0.0 # via -r requirements/ci.in -pytest-cov==4.1.0 +pytest-cov==5.0.0 # via -r requirements/ci.in -pytest-socket==0.6.0 +pytest-socket==0.7.0 # via -r requirements/ci.in -pytest-timeout==2.2.0 +pytest-timeout==2.3.1 # via -r requirements/ci.in -pytest-xdist==3.5.0 +pytest-xdist==3.6.1 # via -r requirements/ci.in -pyyaml==6.0.1 +pyyaml==6.0.2 # via -r requirements/ci.in -tomli==2.0.1 +tomli==2.0.2 # via # coverage # mypy # pytest -typed-ast==1.5.5 - # via mypy -typeguard==4.1.2 - # via -r requirements/ci.in -types-dataclasses==0.6.6 +typeguard==4.3.0 # via -r requirements/ci.in -types-pillow==10.1.0.2 +types-pillow==10.2.0.20240822 # via -r requirements/ci.in -typing-extensions==4.7.1 +typing-extensions==4.12.2 # via - # importlib-metadata # mypy # typeguard -zipp==3.15.0 +zipp==3.20.2 # via importlib-metadata diff --git a/requirements/dev.txt b/requirements/dev.txt index 772f31b06..8935fcafa 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,6 +1,6 @@ # -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: # # pip-compile requirements/dev.in # @@ -37,12 +37,7 @@ identify==2.5.24 idna==3.6 # via requests importlib-metadata==6.7.0 - # via - # build - # click - # pre-commit - # pytest - # virtualenv + # via build iniconfig==2.0.0 # via pytest mypy-extensions==1.0.0 @@ -86,16 +81,12 @@ tomli==2.0.1 # build # coverage # pip-tools + # pyproject-hooks # pytest tomli-w==1.0.0 # via flit -typed-ast==1.5.5 - # via black typing-extensions==4.7.1 - # via - # black - # importlib-metadata - # platformdirs + # via black urllib3==2.0.7 # via requests virtualenv==20.25.0 From 1c38788fe2c79f72dc270d4cb1b3784ef3434595 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:12:23 +0200 Subject: [PATCH 2/8] use new ruff command --- .github/workflows/github-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index fe7ff74d2..eb5b71d5e 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -163,7 +163,7 @@ jobs: - name: Test with ruff run: | echo `ruff --version` - ruff . + ruff check . - name: Test with mypy run : | mypy pypdf From 1cc45913cb0ae13b96c3ab71ebf045cb18d583d6 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:23:00 +0200 Subject: [PATCH 3/8] apply safe ruff fixes --- docs/conf.py | 2 +- make_release.py | 13 ++- mutmut_config.py | 1 + pypdf/_cmap.py | 2 + pypdf/_codecs/_codecs.py | 2 + pypdf/_doc_common.py | 14 +++ pypdf/_encryption.py | 13 +++ pypdf/_page.py | 34 +++++- pypdf/_page_labels.py | 4 + pypdf/_reader.py | 10 ++ pypdf/_text_extraction/__init__.py | 1 + .../_layout_mode/_fixed_width_page.py | 6 + pypdf/_text_extraction/_layout_mode/_font.py | 1 + .../_layout_mode/_text_state_manager.py | 4 + .../_layout_mode/_text_state_params.py | 2 + pypdf/_utils.py | 8 ++ pypdf/_writer.py | 44 +++++++- pypdf/_xobj_image_helpers.py | 1 + pypdf/annotations/_markup_annotations.py | 2 + pypdf/constants.py | 4 + pypdf/filters.py | 10 ++ pypdf/generic/_base.py | 13 +++ pypdf/generic/_data_structures.py | 15 +++ pypdf/generic/_fit.py | 5 + pypdf/generic/_utils.py | 1 + pypdf/pagerange.py | 3 + pypdf/xmp.py | 2 + pyproject.toml | 8 +- tests/__init__.py | 2 + tests/bench.py | 4 +- tests/test_cmap.py | 32 +++--- tests/test_encryption.py | 2 +- tests/test_filters.py | 60 +++++----- tests/test_generic.py | 22 ++-- tests/test_images.py | 36 +++--- tests/test_javascript.py | 2 +- tests/test_merger.py | 36 +++--- tests/test_page.py | 48 ++++---- tests/test_page_labels.py | 6 +- tests/test_pdfa.py | 2 +- tests/test_reader.py | 104 +++++++++--------- tests/test_text_extraction.py | 14 +-- tests/test_workflows.py | 68 ++++++------ tests/test_writer.py | 85 +++++++------- tests/test_xmp.py | 14 +-- tests/test_xobject_image_helpers.py | 2 +- 46 files changed, 488 insertions(+), 276 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 82672f35d..6b2b0e74a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -18,7 +18,7 @@ sys.path.insert(0, os.path.abspath(".")) sys.path.insert(0, os.path.abspath("../")) -import pypdf as py_pkg # noqa: E402 +import pypdf as py_pkg shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md") shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md") diff --git a/make_release.py b/make_release.py index 7f05cb844..2e2a98bbc 100644 --- a/make_release.py +++ b/make_release.py @@ -30,6 +30,7 @@ def main(changelog_path: str) -> None: Args: changelog_path: The location of the CHANGELOG file + """ changelog = get_changelog(changelog_path) git_tag = get_most_recent_git_tag() @@ -66,7 +67,7 @@ def print_instructions(new_version: str) -> None: print("=" * 80) print(f"☑ {VERSION_FILE_PATH} was adjusted to '{new_version}'") print(f"☑ {CHANGELOG_FILE_PATH} was adjusted") - print("") + print() print("Now run:") print(" git commit -eF RELEASE_COMMIT_MSG.md") print(" git push") @@ -149,6 +150,7 @@ def version_bump(git_tag: str) -> str: Returns: The new version where the patch version is bumped. + """ # just assume a patch version change major, minor, patch = git_tag.split(".") @@ -164,6 +166,7 @@ def get_changelog(changelog_path: str) -> str: Returns: Data of the CHANGELOG + """ with open(changelog_path, encoding="utf-8") as fh: changelog = fh.read() @@ -177,6 +180,7 @@ def write_changelog(new_changelog: str, changelog_path: str) -> None: Args: new_changelog: Contents of the new CHANGELOG changelog_path: Path where the CHANGELOG file is + """ with open(changelog_path, "w", encoding="utf-8") as fh: fh.write(new_changelog) @@ -191,6 +195,7 @@ def get_formatted_changes(git_tag: str) -> Tuple[str, str]: Returns: Changes done since git_tag + """ commits = get_git_commits_since_tag(git_tag) @@ -266,6 +271,7 @@ def get_most_recent_git_tag() -> str: Returns: Most recently created git tag. + """ git_tag = str( subprocess.check_output( @@ -285,12 +291,13 @@ def get_author_mapping(line_count: int) -> Dict[str, str]: Returns: A mapping of long commit hashes to author login handles. + """ per_page = min(line_count, 100) page = 1 mapping: Dict[str, str] = {} for _ in range(0, line_count, per_page): - with urllib.request.urlopen( # noqa: S310 + with urllib.request.urlopen( f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}" ) as response: commits = json.loads(response.read()) @@ -310,6 +317,7 @@ def get_git_commits_since_tag(git_tag: str) -> List[Change]: Returns: List of all changes since git_tag. + """ commits = ( subprocess.check_output( @@ -342,6 +350,7 @@ def parse_commit_line(line: str, authors: Dict[str, str]) -> Change: Raises: ValueError: The commit line is not well-structured + """ parts = line.strip().strip('"\\').split(":::") if len(parts) != 3: diff --git a/mutmut_config.py b/mutmut_config.py index 56d149ac7..ae08b2338 100644 --- a/mutmut_config.py +++ b/mutmut_config.py @@ -13,6 +13,7 @@ def pre_mutation(context: Context) -> None: Args: context: A mutmut Context object + """ line = context.current_source_line.strip() if ( diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index 52a7b47b8..e8e23f9ab 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -27,6 +27,7 @@ def build_char_map( Returns: Font sub-type, space_width criteria (50% of width), encoding, map character-map, font-dictionary. The font-dictionary itself is suitable for the curious. + """ ft: DictionaryObject = obj["/Resources"]["/Font"][font_name] # type: ignore font_subtype, font_halfspace, font_encoding, font_map = build_char_map_from_dict( @@ -49,6 +50,7 @@ def build_char_map_from_dict( Returns: Font sub-type, space_width criteria(50% of width), encoding, map character-map. The font-dictionary itself is suitable for the curious. + """ font_type = cast(str, ft["/Subtype"].get_object()) encoding, map_dict = get_encoding(ft) diff --git a/pypdf/_codecs/_codecs.py b/pypdf/_codecs/_codecs.py index 6798dcdee..9b7fd05b7 100644 --- a/pypdf/_codecs/_codecs.py +++ b/pypdf/_codecs/_codecs.py @@ -23,6 +23,7 @@ def encode(self, data: bytes) -> bytes: Returns: Encoded data. + """ @abstractmethod @@ -35,6 +36,7 @@ def decode(self, data: bytes) -> bytes: Returns: Decoded data. + """ diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index d6316cee5..3baff2024 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -342,6 +342,7 @@ def get_num_pages(self) -> int: Raises: PdfReadError: if file is encrypted and restrictions prevent this action. + """ # Flattened pages will not work on an encrypted PDF; # the PDF file's page count is used in this case. Otherwise, @@ -365,6 +366,7 @@ def get_page(self, page_number: int) -> PageObject: Returns: A :class:`PageObject` instance. + """ if self.flattened_pages is None: self._flatten(self._readonly) @@ -468,6 +470,7 @@ def _get_named_destinations( Returns: A dictionary which maps names to :class:`Destinations`. + """ if retval is None: retval = {} @@ -550,6 +553,7 @@ def get_fields( value is a :class:`Field` object. By default, the mapping name is used for keys. ``None`` if form data could not be located. + """ field_attributes = FA.attributes_dict() field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict()) @@ -700,6 +704,7 @@ def get_form_text_fields(self, full_qualified_name: bool = False) -> Dict[str, A If the document contains multiple form fields with the same name, the second and following will get the suffix .2, .3, ... + """ def indexed_key(k: str, fields: Dict[Any, Any]) -> str: @@ -745,6 +750,7 @@ def get_pages_showing_field( - Multi-page list: Field with multiple kids widgets (example: radio buttons, field repeated on multiple pages). + """ def _get_inherited(obj: DictionaryObject, key: str) -> Any: @@ -806,6 +812,7 @@ def open_destination( Raises: Exception: If a destination is invalid. + """ if "/OpenAction" not in self.root_object: return None @@ -917,6 +924,7 @@ def get_page_number(self, page: PageObject) -> Optional[int]: Returns: The page number or None if page is not found + """ return self._get_page_number_by_indirect(page.indirect_reference) @@ -929,6 +937,7 @@ def get_destination_page_number(self, destination: Destination) -> Optional[int] Returns: The page number or None if page is not found + """ return self._get_page_number_by_indirect(destination.page) @@ -1135,6 +1144,7 @@ def _flatten( pages: inherit: indirect_reference: Used recursively to flatten the /Pages object. + """ inheritable_page_attributes = ( NameObject(PG.RESOURCES), @@ -1208,6 +1218,7 @@ def remove_page( clean: replace PageObject with NullObject to prevent annotations or destinations to reference a detached page. + """ if self.flattened_pages is None: self._flatten(self._readonly) @@ -1246,6 +1257,7 @@ def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]: Returns: A PdfObject + """ return IndirectObject(num, gen, self).get_object() @@ -1333,6 +1345,7 @@ def _list_attachments(self) -> List[str]: Returns: list of filenames + """ catalog = self.root_object # From the catalog get the embedded file names @@ -1371,6 +1384,7 @@ def _get_attachments( Returns: dictionary of filename -> Union[bytestring or List[ByteString]] If the filename exists multiple times a list of the different versions will be provided. + """ catalog = self.root_object # From the catalog get the embedded file names diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py index e5cdd9324..8446fec00 100644 --- a/pypdf/_encryption.py +++ b/pypdf/_encryption.py @@ -188,6 +188,7 @@ def compute_key( Returns: The u_hash digest of length key_size + """ a = _padding(password) u_hash = hashlib.md5(a) @@ -243,6 +244,7 @@ def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes Returns: The RC4 key + """ a = _padding(owner_password) o_hash_digest = hashlib.md5(a).digest() @@ -266,6 +268,7 @@ def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes: Returns: The RC4 encrypted + """ a = _padding(user_password) rc4_enc = rc4_encrypt(rc4_key, a) @@ -297,6 +300,7 @@ def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes: Returns: The value + """ if rev <= 2: value = rc4_encrypt(key, _PADDING) @@ -381,6 +385,7 @@ def verify_user_password( Returns: The key + """ key = AlgV4.compute_key( user_password, rev, key_size, o_entry, P, id1_entry, metadata_encrypted @@ -443,6 +448,7 @@ def verify_owner_password( Returns: bytes + """ rc4_key = AlgV4.compute_O_value_key(owner_password, rev, key_size) @@ -526,6 +532,7 @@ def verify_owner_password( Returns: The key + """ password = password[:127] if ( @@ -556,6 +563,7 @@ def verify_user_password( Returns: bytes + """ password = password[:127] if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]: @@ -605,6 +613,7 @@ def verify_perms( Returns: A boolean + """ b8 = b"T" if metadata_encrypted else b"F" p1 = struct.pack(" Tuple[bytes, bytes]: Returns: A tuple (u-value, ue value) + """ random_bytes = secrets.token_bytes(16) val_salt = random_bytes[:8] @@ -702,6 +712,7 @@ def compute_O_value( Returns: A tuple (O value, OE value) + """ random_bytes = secrets.token_bytes(16) val_salt = random_bytes[:8] @@ -745,6 +756,7 @@ def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes: Returns: The perms value + """ b8 = b"T" if metadata_encrypted else b"F" rr = secrets.token_bytes(4) @@ -798,6 +810,7 @@ class Encryption: encrypting embedded file streams that do not have their own crypt filter specifier. values: Additional encryption parameters. + """ def __init__( diff --git a/pypdf/_page.py b/pypdf/_page.py index 11507de96..a6f8abaeb 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -164,6 +164,7 @@ class Transformation: >>> from pypdf import Transformation >>> op = Transformation().scale(sx=2, sy=3).translate(tx=10, ty=20) >>> page.add_transformation(op) + """ # 9.5.4 Coordinate Systems for 3D @@ -194,6 +195,7 @@ def compress(matrix: TransformationMatrixType) -> CompressedTransformationMatrix Returns: A tuple representing the transformation matrix as (a, b, c, d, e, f) + """ return ( matrix[0][0], @@ -219,6 +221,7 @@ def transform(self, m: "Transformation") -> "Transformation": >>> op = Transformation((1, 0, 0, -1, 0, height)) # vertical mirror >>> op = Transformation().transform(Transformation((-1, 0, 0, 1, iwidth, 0))) # horizontal mirror >>> page.add_transformation(op) + """ ctm = Transformation.compress(matrix_multiply(self.matrix, m.matrix)) return Transformation(ctm) @@ -233,6 +236,7 @@ def translate(self, tx: float = 0, ty: float = 0) -> "Transformation": Returns: A new ``Transformation`` instance + """ m = self.ctm return Transformation(ctm=(m[0], m[1], m[2], m[3], m[4] + tx, m[5] + ty)) @@ -252,6 +256,7 @@ def scale( Returns: A new Transformation instance with the scaled matrix. + """ if sx is None and sy is None: raise ValueError("Either sx or sy must be specified") @@ -274,6 +279,7 @@ def rotate(self, rotation: float) -> "Transformation": Returns: A new ``Transformation`` instance with the rotated matrix. + """ rotation = math.radians(rotation) op: TransformationMatrixType = ( @@ -310,6 +316,7 @@ def apply_on( Returns: A tuple or list representing the transformed point in the form (x', y') + """ typ = FloatObject if as_object else float pt1 = ( @@ -365,6 +372,7 @@ def replace(self, new_image: Image, **kwargs: Any) -> None: It is not allowed for inline images or images within a PdfReader. The `kwargs` parameter allows passing additional parameters to `Image.save()`, such as quality. + """ if pil_not_imported: raise ImportError( @@ -487,6 +495,7 @@ class PageObject(DictionaryObject): pdf: PDF file the page belongs to. indirect_reference: Stores the original indirect reference to this object in its source PDF + """ original_page: "PageObject" # very local use in writer when appending @@ -515,6 +524,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash( (DictionaryObject, tuple(((k, v.hash_bin()) for k, v in self.items()))) @@ -561,6 +571,7 @@ def create_blank_page( Raises: PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains no page + """ page = PageObject(pdf) @@ -693,6 +704,7 @@ def images(self) -> VirtualListImages: Inline images are extracted and named ~0~, ~1~, ..., with the indirect_reference set to None. + """ return VirtualListImages(self._get_ids_image, self._get_image) @@ -881,6 +893,7 @@ def rotate(self, angle: int) -> "PageObject": Returns: The rotated PageObject + """ if angle % 90 != 0: raise ValueError("Rotation angle must be a multiple of 90") @@ -916,6 +929,7 @@ def compute_unique_key(base_key: str) -> Tuple[str, bool]: A tuple (computed key, bool) where the boolean indicates if there is a resource of the given computed_key with the same value. + """ value = page2res.raw_get(base_key) # TODO : possible improvement : in case of writer, the indirect_reference @@ -1038,6 +1052,7 @@ def get_contents(self) -> Optional[ContentStream]: Returns: The ``/Contents`` object, or ``None`` if it does not exist. ``/Contents`` is optional, as described in §7.7.3.3 of the PDF Reference. + """ if PG.CONTENTS in self: try: @@ -1130,6 +1145,7 @@ def merge_page( over: set the page2 content over page1 if True (default) else under expand: If True, the current page dimensions will be expanded to accommodate the dimensions of the page to be merged. + """ self._merge_page(page2, over=over, expand=expand) @@ -1445,6 +1461,7 @@ def merge_transformed_page( over: set the page2 content over page1 if True (default) else under expand: Whether the page should be expanded to fit the dimensions of the page to be merged. + """ if isinstance(ctm, Transformation): ctm = ctm.ctm @@ -1471,6 +1488,7 @@ def merge_scaled_page( over: set the page2 content over page1 if True (default) else under expand: Whether the page should be expanded to fit the dimensions of the page to be merged. + """ op = Transformation().scale(scale, scale) self.merge_transformed_page(page2, op, over, expand) @@ -1492,6 +1510,7 @@ def merge_rotated_page( over: set the page2 content over page1 if True (default) else under expand: Whether the page should be expanded to fit the dimensions of the page to be merged. + """ op = Transformation().rotate(rotation) self.merge_transformed_page(page2, op, over, expand) @@ -1515,6 +1534,7 @@ def merge_translated_page( over: set the page2 content over page1 if True (default) else under expand: Whether the page should be expanded to fit the dimensions of the page to be merged. + """ op = Transformation().translate(tx, ty) self.merge_transformed_page(page2, op, over, expand) @@ -1534,6 +1554,7 @@ def add_transformation( object can be passed. See :doc:`/user/cropping-and-transforming`. + """ if isinstance(ctm, Transformation): ctm = ctm.ctm @@ -1582,6 +1603,7 @@ def scale(self, sx: float, sy: float) -> None: Args: sx: The scaling factor on horizontal axis. sy: The scaling factor on vertical axis. + """ self.add_transformation((sx, 0, 0, sy, 0, 0)) self.cropbox = self.cropbox.scale(sx, sy) @@ -1631,6 +1653,7 @@ def scale_by(self, factor: float) -> None: Args: factor: The scaling factor (for both X and Y axis). + """ self.scale(factor, factor) @@ -1642,6 +1665,7 @@ def scale_to(self, width: float, height: float) -> None: Args: width: The new width. height: The new height. + """ sx = width / float(self.mediabox.width) sy = height / float(self.mediabox.height) @@ -1677,6 +1701,7 @@ def page_number(self) -> Optional[int]: Returns: int : page number; None if the page is not attached to a PDF. + """ if self.indirect_reference is None: return None @@ -1815,6 +1840,7 @@ def _extract_text( content_key: indicate the default key where to extract data None = the object; this allow to reuse the function on XObject default = "/Content" + """ text: str = "" output: str = "" @@ -2148,6 +2174,7 @@ def _layout_mode_fonts(self) -> Dict[str, _layout_mode.Font]: Returns: Dict[str, Font]: dictionary of _layout_mode.Font instances keyed by font name + """ # Font retrieval logic adapted from pypdf.PageObject._extract_text() objr: Any = self @@ -2206,6 +2233,7 @@ def _layout_mode_text( Returns: str: multiline string containing page text in a fixed width format that closely adheres to the rendered layout in the source pdf. + """ fonts = self._layout_mode_fonts() if debug_path: # pragma: no cover @@ -2310,6 +2338,7 @@ def extract_text( Returns: The extracted text + """ if extraction_mode not in ["plain", "layout"]: raise ValueError(f"Invalid text extraction mode '{extraction_mode}'") @@ -2328,7 +2357,7 @@ def extract_text( space_vertically=kwargs.get("layout_mode_space_vertically", True), scale_weight=kwargs.get("layout_mode_scale_weight", 1.25), strip_rotated=kwargs.get("layout_mode_strip_rotated", True), - debug_path=kwargs.get("layout_mode_debug_path", None), + debug_path=kwargs.get("layout_mode_debug_path"), ) if len(args) >= 1: if isinstance(args[0], str): @@ -2388,6 +2417,7 @@ def extract_xform_text( Returns: The extracted text + """ return self._extract_text( xform, @@ -2406,6 +2436,7 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]: Returns: A tuple (Set of embedded fonts, set of unembedded fonts) + """ obj = self.get_object() assert isinstance(obj, DictionaryObject) @@ -2585,6 +2616,7 @@ def _get_fonts_walk( embedded. We create and add to two sets, fnt = fonts used and emb = fonts embedded. + """ fontkeys = ("/FontFile", "/FontFile2", "/FontFile3") diff --git a/pypdf/_page_labels.py b/pypdf/_page_labels.py index 650b324c1..2d025d614 100644 --- a/pypdf/_page_labels.py +++ b/pypdf/_page_labels.py @@ -169,6 +169,7 @@ def index2label(reader: PdfCommonDocProtocol, index: int) -> str: Returns: The label of the page, e.g. "iv" or "4". + """ root = cast(DictionaryObject, reader.root_object) if "/PageLabels" not in root: @@ -221,6 +222,7 @@ def nums_insert( key: number key of the entry value: value of the entry nums: Nums array to modify + """ if len(nums) % 2 != 0: raise ValueError("A nums like array must have an even number of elements") @@ -250,6 +252,7 @@ def nums_clear_range( key: number key of the entry before the range page_index_to: The page index of the upper limit of the range nums: Nums array to modify + """ if len(nums) % 2 != 0: raise ValueError("A nums like array must have an even number of elements") @@ -274,6 +277,7 @@ def nums_next( Args: key: number key of the entry nums: Nums array + """ if len(nums) % 2 != 0: raise ValueError("A nums like array must have an even number of elements") diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 7137734c8..fa42441eb 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -105,6 +105,7 @@ class PdfReader(PdfDocCommon): password: Decrypt PDF file at initialization. If the password is None, the file will not be decrypted. Defaults to ``None``. + """ def __init__( @@ -236,6 +237,7 @@ def _info(self) -> Optional[DictionaryObject]: Returns: /Info Dictionary; None if the entry does not exist + """ info = self.trailer.get(TK.INFO, None) if is_null_or_none(info): @@ -255,6 +257,7 @@ def _ID(self) -> Optional[ArrayObject]: Returns: /ID array; None if the entry does not exist + """ id = self.trailer.get(TK.ID, None) return None if is_null_or_none(id) else cast(ArrayObject, id.get_object()) @@ -324,6 +327,7 @@ def _get_page_number_by_indirect( Returns: Page number or None. + """ if self._page_id2num is None: self._page_id2num = { @@ -598,6 +602,7 @@ def read(self, stream: StreamType) -> None: Args: stream: The PDF file stream. + """ self._basic_validation(stream) self._find_eof_marker(stream) @@ -719,6 +724,7 @@ def _find_startxref_pos(self, stream: StreamType) -> int: Returns: The bytes offset + """ line = read_previous_line(stream) try: @@ -1026,6 +1032,7 @@ def _get_xref_issues(stream: StreamType, startxref: int) -> int: Returns: 0 means no issue, other values represent specific issues. + """ stream.seek(startxref - 1, 0) # -1 to check character before line = stream.read(1) @@ -1160,6 +1167,7 @@ def decrypt(self, password: Union[str, bytes]) -> PasswordType: Returns: An indicator if the document was decrypted and whether it was the owner password or the user password. + """ if not self._encryption: raise PdfReadError("Not encrypted file") @@ -1185,6 +1193,7 @@ def add_form_topname(self, name: str) -> Optional[DictionaryObject]: Returns: The created object. ``None`` means no object was created. + """ catalog = self.root_object @@ -1227,6 +1236,7 @@ def rename_form_topname(self, name: str) -> Optional[DictionaryObject]: Returns: The modified object. ``None`` means no object was modified. + """ catalog = self.root_object diff --git a/pypdf/_text_extraction/__init__.py b/pypdf/_text_extraction/__init__.py index 72d492f6a..66be6ff47 100644 --- a/pypdf/_text_extraction/__init__.py +++ b/pypdf/_text_extraction/__init__.py @@ -47,6 +47,7 @@ def set_custom_rtl( Returns: A tuple containing the new values for ``CUSTOM_RTL_MIN``, ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``. + """ global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS if isinstance(_min, int): diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index e7af1b234..80dc2ed26 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -46,6 +46,7 @@ def bt_group(tj_op: TextStateParams, rendered_text: str, dispaced_tx: float) -> tj_op (TextStateParams): TextStateParams instance rendered_text (str): rendered text dispaced_tx (float): x coordinate of last character in BTGroup + """ return BTGroup( tx=tj_op.tx, @@ -77,6 +78,7 @@ def recurs_to_target_op( Returns: tuple: list of BTGroup dicts + list of TextStateParams dataclass instances. + """ # 1 entry per line of text rendered within each BT/ET operation. bt_groups: List[BTGroup] = [] @@ -210,6 +212,7 @@ def y_coordinate_groups( Returns: Dict[int, List[BTGroup]]: dict of lists of text rendered by each BT operator keyed by y coordinate + """ ty_groups = { ty: sorted(grp, key=lambda x: x["tx"]) @@ -261,6 +264,7 @@ def text_show_operations( Returns: List[BTGroup]: list of dicts of text rendered by each BT operator + """ state_mgr = TextStateManager() # transformation stack manager debug = bool(debug_path) @@ -329,6 +333,7 @@ def fixed_char_width(bt_groups: List[BTGroup], scale_weight: float = 1.25) -> fl Returns: float: fixed character width + """ char_widths = [] for _bt in bt_groups: @@ -351,6 +356,7 @@ def fixed_width_page( Returns: str: page text in a fixed width format that closely adheres to the rendered layout in the source pdf. + """ lines: List[str] = [] last_y_coord = 0 diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index 1d9617d74..4a9b27cad 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -19,6 +19,7 @@ class Font: encoding (str | Dict[int, str]): font encoding char_map (dict): character map font_dictionary (dict): font dictionary + """ subtype: str diff --git a/pypdf/_text_extraction/_layout_mode/_text_state_manager.py b/pypdf/_text_extraction/_layout_mode/_text_state_manager.py index 3c5d47367..8851e1a94 100644 --- a/pypdf/_text_extraction/_layout_mode/_text_state_manager.py +++ b/pypdf/_text_extraction/_layout_mode/_text_state_manager.py @@ -29,6 +29,7 @@ class TextStateManager: Ts (float): text rise font (Font): font object font_size (int | float): font size + """ def __init__(self) -> None: @@ -54,6 +55,7 @@ def set_state_param(self, op: bytes, value: Union[float, List[Any]]) -> None: for unsupported operators (see supported operators above). value (float | List[Any]): new parameter value. If a list, value[0] is used. + """ if op not in [b"Tc", b"Tz", b"Tw", b"TL", b"Ts"]: return @@ -66,6 +68,7 @@ def set_font(self, font: Font, size: float) -> None: Args: font (Font): a layout mode Font size (float): font size + """ self.font = font self.font_size = size @@ -83,6 +86,7 @@ def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams: Returns: TextStateParams: current text state parameters + """ if not isinstance(self.font, Font): raise PdfReadError( diff --git a/pypdf/_text_extraction/_layout_mode/_text_state_params.py b/pypdf/_text_extraction/_layout_mode/_text_state_params.py index b6e6930ca..843fc0651 100644 --- a/pypdf/_text_extraction/_layout_mode/_text_state_params.py +++ b/pypdf/_text_extraction/_layout_mode/_text_state_params.py @@ -31,6 +31,7 @@ class TextStateParams: font_height (float): effective font height accounting for CTM flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.) rotated (bool): True if the text orientation is rotated with respect to the page. + """ txt: str @@ -109,6 +110,7 @@ def displacement_matrix( word (str, optional): Defaults to None in which case self.txt displacement is returned. TD_offset (float, optional): translation applied by TD operator. Defaults to 0.0. + """ word = word if word is not None else self.txt return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, TD_offset), 0.0] diff --git a/pypdf/_utils.py b/pypdf/_utils.py index b599f2efc..fcc749908 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -136,6 +136,7 @@ def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> Returns: The data which was read. + """ txt = b"" while True: @@ -157,6 +158,7 @@ def read_non_whitespace(stream: StreamType) -> bytes: Returns: The data which was read. + """ tok = stream.read(1) while tok in WHITESPACES: @@ -174,6 +176,7 @@ def skip_over_whitespace(stream: StreamType) -> bool: Returns: True if more than one whitespace was skipped, otherwise return False. + """ tok = WHITESPACES[0] cnt = 0 @@ -192,6 +195,7 @@ def check_if_whitespace_only(value: bytes) -> bool: Returns: True if the value only has whitespace characters, otherwise return False. + """ for index in range(len(value)): current = value[index : index + 1] @@ -220,6 +224,7 @@ def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes: Returns: The read bytes. + """ name = b"" while True: @@ -248,6 +253,7 @@ def read_block_backwards(stream: StreamType, to_read: int) -> bytes: Returns: The data which was read. + """ if stream.tell() < to_read: raise PdfStreamError("Could not read malformed PDF file") @@ -274,6 +280,7 @@ def read_previous_line(stream: StreamType) -> bytes: Returns: The data which was read. + """ line_content = [] found_crlf = False @@ -438,6 +445,7 @@ def rename_kwargs( kwargs: aliases: fail: + """ for old_term, new_term in aliases.items(): if old_term in kwargs: diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 0ac1524bc..01a0ea6cb 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -162,6 +162,7 @@ class PdfWriter(PdfDocCommon): full: If true, loads all the objects (always full if incremental = True). This parameters may allows to load very big PDFs. + """ def __init__( @@ -304,6 +305,7 @@ def root_object(self) -> DictionaryObject: Note: Recommended only for read access. + """ return self._root_object @@ -314,6 +316,7 @@ def _info(self) -> Optional[DictionaryObject]: Returns: /Info Dictionary; None if the entry does not exist + """ return ( None @@ -527,6 +530,7 @@ def set_need_appearances_writer(self, state: bool = True) -> None: Returns: None + """ # See §12.7.2 and §7.7.2 for more information: # https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf @@ -573,6 +577,7 @@ def add_page( Returns: The added PageObject. + """ assert self.flattened_pages is not None, "mypy" return self._add_page(page, len(self.flattened_pages), excluded_keys) @@ -594,6 +599,7 @@ def insert_page( Returns: The added PageObject. + """ assert self.flattened_pages is not None, "mypy" if index < 0: @@ -616,6 +622,7 @@ def _get_page_number_by_indirect( Returns: The page number or None + """ # to provide same function as in PdfReader if is_null_or_none(indirect_reference): @@ -648,6 +655,7 @@ def add_blank_page( Raises: PageSizeNotDefinedError: if width and height are not defined and previous page does not exist. + """ page = PageObject.create_blank_page(self, width, height) return self.add_page(page) @@ -676,6 +684,7 @@ def insert_blank_page( Raises: PageSizeNotDefinedError: if width and height are not defined and previous page does not exist. + """ if width is None or height is None and index < self.get_num_pages(): oldpage = self.pages[index] @@ -720,6 +729,7 @@ def add_js(self, javascript: str) -> None: >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});") # Example: This will launch the print window when the PDF is opened. + """ # Names / JavaScript preferred to be able to add multiple scripts if "/Names" not in self._root_object: @@ -757,6 +767,7 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None: Args: filename: The filename to display. data: The data in the file. + """ # We need three entries: # * The file's data @@ -857,6 +868,7 @@ def append_pages_from_reader( (delegates to append_pages_from_reader). The single parameter of the callback is a reference to the page just appended to the document. + """ # Get page count from writer and reader reader_num_pages = len(reader.pages) @@ -1049,6 +1061,7 @@ def update_page_form_field_values( auto_regenerate: Set/unset the need_appearances flag; the flag is unchanged if auto_regenerate is None. + """ if CatalogDictionary.ACRO_FORM not in self._root_object: raise PyPdfError("No /AcroForm dictionary in PdfWriter Object") @@ -1138,6 +1151,7 @@ def reattach_fields( Returns: list of reattached fields. + """ lst = [] if page is None: @@ -1183,6 +1197,7 @@ def clone_reader_document_root(self, reader: PdfReader) -> None: Args: reader: PdfReader from which the document root should be copied. + """ self._info_obj = None if self.incremental: @@ -1231,6 +1246,7 @@ def clone_document_from_reader( (delegates to append_pages_from_reader). The single parameter of the callback is a reference to the page just appended to the document. + """ self.clone_reader_document_root(reader) inf = reader._info @@ -1317,6 +1333,7 @@ def encrypt( algorithm: encrypt algorithm. Values may be one of "RC4-40", "RC4-128", "AES-128", "AES-256-R5", "AES-256". If it is valid, `use_128bit` will be ignored. + """ if owner_password is None: owner_password = user_password @@ -1380,6 +1397,7 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: Returns: A tuple (bool, IO). + """ my_file = False @@ -1388,7 +1406,7 @@ def write(self, stream: Union[Path, StrByteType]) -> Tuple[bool, IO[Any]]: if isinstance(stream, (str, Path)): stream = FileIO(stream, "wb") - self.with_as_usage = True # + self.with_as_usage = True my_file = True self.write_stream(stream) @@ -1407,6 +1425,7 @@ def list_objects_in_increment(self) -> List[IndirectObject]: Returns: List of (new / modified) IndirectObjects + """ return [ cast(IndirectObject, self._objects[i]).indirect_reference @@ -1564,6 +1583,7 @@ def metadata(self) -> Optional[DocumentInformation]: Note that some PDF files use (xmp)metadata streams instead of document information dictionaries, and these metadata streams will not be accessed by this function. + """ return super().metadata @@ -1588,6 +1608,7 @@ def add_metadata(self, infos: Dict[str, Any]) -> None: Args: infos: a Python dictionary where each key is a field and each value is your new metadata. + """ args = {} if isinstance(infos, PdfObject): @@ -1612,6 +1633,7 @@ def compress_identical_objects( Args: remove_identicals: Remove identical objects. remove_orphans: Remove unreferenced objects. + """ def replace_in_obj( @@ -1701,6 +1723,7 @@ def _sweep_indirect_references( Args: root: The root of the PDF object tree to sweep. + """ deprecate( "_sweep_indirect_references has been removed, please report to dev team if this warning is observed", @@ -1732,6 +1755,7 @@ def _resolve_indirect_object( Raises: ValueError: If the input stream is closed. + """ deprecate( "_resolve_indirect_object has been removed, please report to dev team if this warning is observed", @@ -1772,6 +1796,7 @@ def get_threads_root(self) -> ArrayObject: Returns: An array (possibly empty) of Dictionaries with ``/F`` and ``/I`` properties. + """ if CO.THREADS in self._root_object: # Table 3.25 Entries in the catalog dictionary @@ -1882,6 +1907,7 @@ def add_outline_item( Returns: The added outline item as an indirect object. + """ page_ref: Union[None, NullObject, IndirectObject, NumberObject] if isinstance(italic, Fit): # it means that we are on the old params @@ -2000,6 +2026,7 @@ def remove_annotations( Examples are: "/Link", "/FileAttachment", "/Sound", "/Movie", "/Screen", ... If you want to remove all annotations, use subtypes=None. + """ for page in self.pages: self._remove_annots_from_page(page, subtypes) @@ -2034,6 +2061,7 @@ def remove_objects_from_page( page: Page object to clean up. to_delete: Objects to be deleted; can be a ``ObjectDeletionFlag`` or a list of ObjectDeletionFlag + """ if isinstance(to_delete, (list, tuple)): for to_d in to_delete: @@ -2177,6 +2205,7 @@ def remove_images( Args: to_delete : The type of images to be deleted (default = all images types) + """ if isinstance(to_delete, bool): to_delete = ImageType.ALL @@ -2225,6 +2254,7 @@ def add_uri( border: if provided, an array describing border-drawing properties. See the PDF spec for details. No border will be drawn if this argument is omitted. + """ page_link = self.get_object(self._pages)[PA.KIDS][page_number] # type: ignore page_ref = cast(Dict[str, Any], self.get_object(page_link)) @@ -2311,6 +2341,7 @@ def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None: - Show two pages at a time, odd-numbered pages on the left * - /TwoPageRight - Show two pages at a time, odd-numbered pages on the right + """ if not isinstance(layout, NameObject): if layout not in self._valid_layouts: @@ -2345,6 +2376,7 @@ def set_page_layout(self, layout: LayoutType) -> None: - Show two pages at a time, odd-numbered pages on the left * - /TwoPageRight - Show two pages at a time, odd-numbered pages on the right + """ self._set_page_layout(layout) @@ -2444,6 +2476,7 @@ def add_annotation( Returns: The inserted object. This can be used for popup creation, for example. + """ page = page_number if isinstance(page, int): @@ -2491,6 +2524,7 @@ def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject: Returns: The cleaned PageObject + """ page = cast("PageObject", page.get_object()) for a in page.get("/Annots", []): @@ -2582,6 +2616,7 @@ def append( excluded_fields: Provide the list of fields/keys to be ignored if ``/Annots`` is part of the list, the annotation will be ignored if ``/B`` is part of the list, the articles will be ignored + """ if excluded_fields is None: excluded_fields = () @@ -2645,6 +2680,7 @@ def merge( Raises: TypeError: The pages attribute is not configured properly + """ if isinstance(fileobj, PdfDocCommon): reader = fileobj @@ -2814,6 +2850,7 @@ def _add_articles_thread( Returns: The added thread as an indirect reference + """ nthread = thread.clone( self, force_duplicate=True, ignore_fields=("/F",) @@ -2877,6 +2914,7 @@ def add_filtered_articles( fltr: pages: reader: + """ if isinstance(fltr, str): fltr = re.compile(fltr) @@ -2978,6 +3016,7 @@ def _get_filtered_outline( Returns: A list of destination objects. + """ new_outline = [] if node is None: @@ -3103,6 +3142,7 @@ def reset_translation( Args: reader: PdfReader or IndirectObject referencing a PdfReader object. if set to None or omitted, all tables will be reset. + """ if reader is None: self._id_translated = {} @@ -3153,6 +3193,7 @@ def set_page_label( Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1. + """ if style is None and prefix is None: raise ValueError("At least one of style and prefix must be given") @@ -3200,6 +3241,7 @@ def _set_page_label( in the range. Subsequent pages are numbered sequentially from this value, which must be greater than or equal to 1. Default value: 1. + """ default_page_label = DictionaryObject() default_page_label[NameObject("/S")] = NameObject("/D") diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 353b8181c..5687b3233 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -48,6 +48,7 @@ def _get_imagemode( Returns Image mode not taking into account mask(transparency) ColorInversion is required (like for some DeviceCMYK) + """ if depth > MAX_IMAGE_MODE_NESTING_DEPTH: raise PdfReadError( diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 5d7da5659..ea5c641ed 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -46,6 +46,7 @@ class MarkupAnnotation(AnnotationDictionary, ABC): Args: title_bar: Text to be displayed in the title bar of the annotation; by convention this is the name of the author + """ def __init__(self, *, title_bar: Optional[str] = None): @@ -63,6 +64,7 @@ class Text(MarkupAnnotation): text: The text that is added to the document open: flags: + """ def __init__( diff --git a/pypdf/constants.py b/pypdf/constants.py index 6ce1e391b..d90d5e0d3 100644 --- a/pypdf/constants.py +++ b/pypdf/constants.py @@ -474,6 +474,7 @@ def attributes(cls) -> Tuple[str, ...]: Returns: A tuple containing all the attribute constants. + """ return ( cls.TM, @@ -501,6 +502,7 @@ def attributes_dict(cls) -> Dict[str, str]: Returns: A dictionary containing attribute keys and their names. + """ return { cls.FT: "Field Type", @@ -531,6 +533,7 @@ def attributes(cls) -> Tuple[str, ...]: Returns: A tuple containing all the attribute constants. + """ return (cls.Opt,) @@ -547,6 +550,7 @@ def attributes_dict(cls) -> Dict[str, str]: Returns: A dictionary containing attribute keys and their names. + """ return { cls.Opt: "Options", diff --git a/pypdf/filters.py b/pypdf/filters.py index 67265f7e5..517d6aac3 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -76,6 +76,7 @@ def decompress(data: bytes) -> bytes: Returns: The decompressed data. + """ try: return zlib.decompress(data) @@ -115,6 +116,7 @@ def decode( Raises: PdfReadError: + """ if isinstance(decode_parms, ArrayObject): raise DeprecationError("decode_parms as ArrayObject is deprecated") @@ -244,6 +246,7 @@ def encode(data: bytes, level: int = -1) -> bytes: Returns: The compressed data. + """ return zlib.compress(data, level) @@ -275,6 +278,7 @@ def decode( Raises: PdfStreamError: + """ # decode_parms is unused here @@ -336,6 +340,7 @@ def decode( Raises: PdfStreamError: + """ # decode_parms is unused here @@ -391,6 +396,7 @@ def _decodeb( Returns: decoded data. + """ # decode_parms is unused here return LZWDecode.Decoder(data).decode() @@ -410,6 +416,7 @@ def decode( Returns: decoded data. + """ # decode_parms is unused here deprecate("LZWDecode.decode will return bytes instead of str in pypdf 6.0.0") @@ -434,6 +441,7 @@ def decode( Returns: decoded data. + """ if isinstance(data, str): data = data.encode() @@ -601,6 +609,7 @@ def decode_stream_data(stream: Any) -> bytes: # utils.StreamObject Raises: NotImplementedError: If an unsupported filter type is encountered. + """ filters = stream.get(SA.FILTER, ()) if isinstance(filters, IndirectObject): @@ -656,6 +665,7 @@ def _xobj_to_image(x_object_obj: Dict[str, Any]) -> Tuple[Optional[str], bytes, Returns: Tuple[file extension, bytes, PIL.Image.Image] + """ from ._xobj_image_helpers import ( Image, diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index 7e523c7dd..21aa558fe 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -65,6 +65,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ raise NotImplementedError( f"{self.__class__.__name__} does not implement .hash_bin() so far" @@ -95,6 +96,7 @@ def replicate( Returns: The cloned PdfObject + """ return self.clone(pdf_dest) @@ -123,6 +125,7 @@ def clone( Returns: The cloned PdfObject + """ raise NotImplementedError( f"{self.__class__.__name__} does not implement .clone so far" @@ -143,6 +146,7 @@ def _reference_clone( Returns: The clone + """ try: if not force_duplicate and clone.indirect_reference.pdf == pdf_dest: @@ -213,6 +217,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__,)) @@ -258,6 +263,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self.value)) @@ -311,6 +317,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self.idnum, self.generation, id(self.pdf))) @@ -484,6 +491,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self.as_numeric)) @@ -538,6 +546,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self.as_numeric())) @@ -590,6 +599,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, bytes(self))) @@ -678,6 +688,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self.original_bytes)) @@ -783,6 +794,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, self)) @@ -877,6 +889,7 @@ def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject] """ Returns: True if x is None or NullObject. + """ return x is None or ( isinstance(x, PdfObject) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 3b103c009..c648eeccc 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -154,6 +154,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash((self.__class__, tuple(x.hash_bin() for x in self))) @@ -191,6 +192,7 @@ def __add__(self, lst: Any) -> "ArrayObject": Returns: ArrayObject with all elements + """ temp = ArrayObject(self) temp.extend(self._to_lst(lst)) @@ -206,6 +208,7 @@ def __iadd__(self, lst: Any) -> Self: if str is passed it will be converted into TextStringObject or NameObject (if starting with "/") if bytes is passed it will be converted into ByteStringObject + """ self.extend(self._to_lst(lst)) return self @@ -318,6 +321,7 @@ def _clone( pdf_dest: force_duplicate: ignore_fields: + """ # first we remove for the ignore_fields # that are for a limited number of levels @@ -421,6 +425,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ return hash( (self.__class__, tuple(((k, v.hash_bin()) for k, v in self.items()))) @@ -441,6 +446,7 @@ def get_inherited(self, key: str, default: Any = None) -> Any: Returns: Current key or inherited one, otherwise default value. + """ if key in self: return self[key] @@ -482,6 +488,7 @@ def xmp_metadata(self) -> Optional[XmpInformationProtocol]: Returns a :class:`~pypdf.xmp.XmpInformation` instance that can be used to access XMP metadata from the document. Can also return None if no metadata was found on the document root. + """ from ..xmp import XmpInformation @@ -797,6 +804,7 @@ def _remove_node_from_tree( prev_ref: cur: last: + """ next_ref = cur.get(NameObject("/Next"), None) if prev is None: @@ -893,6 +901,7 @@ def _reset_node_tree_relationship(child_obj: Any) -> None: Args: child_obj: + """ del child_obj[NameObject("/Parent")] if NameObject("/Next") in child_obj: @@ -947,6 +956,7 @@ def _clone( pdf_dest: force_duplicate: ignore_fields: + """ self._data = cast("StreamObject", src)._data try: @@ -968,6 +978,7 @@ def hash_bin(self) -> int: Returns: Hash considering type and value. + """ # use of _data to prevent errors on non decoded stream such as JBIG2 return hash((super().hash_bin(), self._data)) @@ -1062,6 +1073,7 @@ def decode_as_image(self) -> Any: errors during decoding will be reported It is recommended to catch exceptions to prevent stops in your program. + """ from ..filters import _xobj_to_image @@ -1220,6 +1232,7 @@ def clone( Returns: The cloned ContentStream + """ try: if self.indirect_reference.pdf == pdf_dest and not force_duplicate: # type: ignore @@ -1255,6 +1268,7 @@ def _clone( pdf_dest: force_duplicate: ignore_fields: + """ src_cs = cast("ContentStream", src) super().set_data(src_cs._data) @@ -1579,6 +1593,7 @@ class Destination(TreeObject): Raises: PdfReadError: If destination type is invalid. + """ node: Optional[ diff --git a/pypdf/generic/_fit.py b/pypdf/generic/_fit.py index c44d12b4c..e8f2009a2 100644 --- a/pypdf/generic/_fit.py +++ b/pypdf/generic/_fit.py @@ -38,6 +38,7 @@ def xyz( Returns: The created fit object. + """ return Fit(fit_type="/XYZ", fit_args=(left, top, zoom)) @@ -70,6 +71,7 @@ def fit_horizontally(cls, top: Optional[float] = None) -> "Fit": Returns: The created fit object. + """ return Fit(fit_type="/FitH", fit_args=(top,)) @@ -106,6 +108,7 @@ def fit_rectangle( Returns: The created fit object. + """ return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top)) @@ -138,6 +141,7 @@ def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit": Returns: The created fit object. + """ return Fit(fit_type="/FitBH", fit_args=(top,)) @@ -157,6 +161,7 @@ def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit": Returns: The created fit object. + """ return Fit(fit_type="/FitBV", fit_args=(left,)) diff --git a/pypdf/generic/_utils.py b/pypdf/generic/_utils.py index 6fce6d0b2..a53b31799 100644 --- a/pypdf/generic/_utils.py +++ b/pypdf/generic/_utils.py @@ -137,6 +137,7 @@ def create_string_object( Raises: TypeError: If string is not of type str or bytes. + """ if isinstance(string, str): return TextStringObject(string) diff --git a/pypdf/pagerange.py b/pypdf/pagerange.py index 47a72c72f..47cf1341e 100644 --- a/pypdf/pagerange.py +++ b/pypdf/pagerange.py @@ -86,6 +86,7 @@ def valid(input: Any) -> bool: Returns: True, if the ``input`` is a valid PageRange. + """ return isinstance(input, (slice, PageRange)) or ( isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input)) @@ -124,6 +125,7 @@ def indices(self, n: int) -> Tuple[int, int, int]: Returns: Arguments for range(). + """ return self._slice.indices(n) @@ -166,6 +168,7 @@ def parse_filename_page_ranges( Returns: A list of (filename, page_range) pairs. + """ pairs: List[Tuple[str, PageRange]] = [] pdf_filename = None diff --git a/pypdf/xmp.py b/pypdf/xmp.py index 0c4444fc1..099c36976 100644 --- a/pypdf/xmp.py +++ b/pypdf/xmp.py @@ -207,6 +207,7 @@ class XmpInformation(PdfObject): Raises: PdfReadError: if XML is invalid + """ def __init__(self, stream: ContentStream) -> None: @@ -369,6 +370,7 @@ def custom_properties(self) -> Dict[Any, Any]: Returns: A dictionary of key/value items for custom metadata properties. + """ if not hasattr(self, "_custom_properties"): self._custom_properties = {} diff --git a/pyproject.toml b/pyproject.toml index 1dbc3cb05..ba3afeb89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,6 +113,8 @@ exclude_lines = [ [tool.ruff] line-length = 120 + +[tool.ruff.lint] select = ["ALL"] ignore = [ "D404", # First word of the docstring should not be "This" @@ -193,10 +195,10 @@ ignore = [ "PT014", # Duplicate of test case at index 1 in `@pytest_mark.parametrize` ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 54 # Recommended: 10 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "_cryptography.py" = ["S304", "S305"] # Use of insecure cipher / modes, aka RC4 and AES-ECB "_encryption.py" = ["S324"] "_writer.py" = ["S324"] @@ -208,7 +210,7 @@ max-complexity = 54 # Recommended: 10 "tests/*" = ["S101", "ANN001", "ANN201","D104", "S105", "S106", "D103", "B018", "B017"] "tests/test_workflows.py" = ["T201"] -[tool.ruff.pylint] +[tool.ruff.lint.pylint] allow-magic-value-types = ["bytes", "float", "int", "str"] max-args = 12 # Recommended: 5 max-branches = 36 # Recommended: 12 diff --git a/tests/__init__.py b/tests/__init__.py index 0da6f6548..f82d7331f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -24,6 +24,7 @@ def get_data_from_url(url: Optional[str] = None, name: Optional[str] = None) -> Returns: Read File as bytes + """ if name is None: raise ValueError("A name must always be specified") @@ -72,6 +73,7 @@ def _strip_position(line: str) -> str: Returns: A line with stripped position + """ line = ".py".join(line.split(".py:")[1:]) line = " ".join(line.split(" ")[1:]) diff --git a/tests/bench.py b/tests/bench.py index bf2eca580..eba7be2a9 100644 --- a/tests/bench.py +++ b/tests/bench.py @@ -220,7 +220,7 @@ def image_new_property(data): assert list(PageObject(None, None).images) == [] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_image_new_property_performance(benchmark): url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf" name = "pdf_font_garbled.pdf" @@ -234,7 +234,7 @@ def image_extraction(data): list(reader.pages[0].images) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_large_compressed_image_performance(benchmark): url = "https://github.com/py-pdf/pypdf/files/15306199/file_with_large_compressed_image.pdf" data = BytesIO(get_data_from_url(url, name="file_with_large_compressed_image.pdf")) diff --git a/tests/test_cmap.py b/tests/test_cmap.py index f0432469d..2a83bc3b5 100644 --- a/tests/test_cmap.py +++ b/tests/test_cmap.py @@ -15,8 +15,8 @@ RESOURCE_ROOT = PROJECT_ROOT / "resources" -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("url", "name", "strict"), [ @@ -52,7 +52,7 @@ def test_text_extraction_slow(caplog, url: str, name: str, strict: bool): assert caplog.text == "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "strict"), [ @@ -89,7 +89,7 @@ def test_text_extraction_fast(caplog, url: str, name: str, strict: bool): assert caplog.text == "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_parse_encoding_advanced_encoding_not_implemented(caplog): reader = PdfReader(BytesIO(get_data_from_url(name="tika-957144.pdf"))) for page in reader.pages: @@ -97,14 +97,14 @@ def test_parse_encoding_advanced_encoding_not_implemented(caplog): assert "Advanced encoding /WinAnsEncoding not implemented yet" in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_ascii_charset(): # Issue #1312 reader = PdfReader(BytesIO(get_data_from_url(name="ascii charset.pdf"))) assert "/a" not in reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "page_nb", "within_text"), [ @@ -130,14 +130,14 @@ def test_text_extraction_of_specific_pages( assert within_text in reader.pages[page_nb].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1533(): reader = PdfReader(BytesIO(get_data_from_url(name="iss1533.pdf"))) reader.pages[0].extract_text() # no error assert build_char_map("/F", 200, reader.pages[0])[3]["\x01"] == "Ü" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "page_index", "within_text", "caplog_text"), [ @@ -165,7 +165,7 @@ def test_cmap_encodings(caplog, url, name, page_index, within_text, caplog_text) assert caplog_text in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_latex(): reader = PdfReader(BytesIO(get_data_from_url(name="math_latex.pdf"))) txt = reader.pages[0].extract_text() # no error @@ -174,7 +174,7 @@ def test_latex(): # actually the ϕ and φ seems to be crossed in latex -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_unixxx_glyphs(): reader = PdfReader(BytesIO(get_data_from_url(name="unixxx_glyphs.pdf"))) txt = reader.pages[0].extract_text() # no error @@ -182,7 +182,7 @@ def test_unixxx_glyphs(): assert pat in txt -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_cmap_compute_space_width(): # issue 2137 # original file URL: @@ -193,21 +193,21 @@ def test_cmap_compute_space_width(): reader.pages[0].extract_text() # no error -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_tabs_in_cmap(): """Issue #2173""" reader = PdfReader(BytesIO(get_data_from_url(name="iss2173.pdf"))) reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_ignoring_non_put_entries(): """Issue #2290""" reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf"))) reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_eten_b5(): """Issue #2356""" reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf"))) @@ -239,7 +239,7 @@ def test_null_missing_width(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_unigb_utf16(): """Cf #2812""" url = ( @@ -250,7 +250,7 @@ def test_unigb_utf16(): assert "《中国能源展望 2060(2024 年版)》编写委员会" in reader.pages[1].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_too_many_differences(): """Cf #2836""" url = ( diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 07278d37d..80ec56c77 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -344,7 +344,7 @@ def test_aes_decrypt_corrupted_data(): aes.decrypt(secrets.token_bytes(num)) -@pytest.mark.samples() +@pytest.mark.samples def test_encrypt_stream_dictionary(pdf_file_path): user_password = secrets.token_urlsafe(10) diff --git a/tests/test_filters.py b/tests/test_filters.py index bfe963222..4e87d9c60 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -135,7 +135,7 @@ def test_ascii_hex_decode_missing_eod(): # assert exc.value.args[0] == "Unexpected EOD in ASCIIHexDecode" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_decode_ahx(): """ See #1979 @@ -223,7 +223,7 @@ def test_ccitt_fax_decode(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_decompress_zlib_error(caplog): reader = PdfReader(BytesIO(get_data_from_url(name="tika-952445.pdf"))) for page in reader.pages: @@ -231,20 +231,20 @@ def test_decompress_zlib_error(caplog): assert "incorrect startxref pointer(3)" in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_lzw_decode_neg1(): reader = PdfReader(BytesIO(get_data_from_url(name="tika-921632.pdf"))) page = reader.pages[47] assert page.extract_text().startswith("Chapter 2") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_issue_399(): reader = PdfReader(BytesIO(get_data_from_url(name="tika-976970.pdf"))) reader.pages[1].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_image_without_pillow(tmp_path): import os @@ -279,7 +279,7 @@ def test_image_without_pillow(tmp_path): except KeyError: env["PYTHONPATH"] = "." result = subprocess.run( - [shutil.which("python"), source_file], # noqa: S603 + [shutil.which("python"), source_file], capture_output=True, env=env, ) @@ -291,7 +291,7 @@ def test_image_without_pillow(tmp_path): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_issue_1737(): reader = PdfReader(BytesIO(get_data_from_url(name="iss1737.pdf"))) reader.pages[0]["/Resources"]["/XObject"]["/Im0"].get_data() @@ -299,7 +299,7 @@ def test_issue_1737(): reader.pages[0]["/Resources"]["/XObject"]["/Im2"].get_data() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_pa_image_extraction(): """ PNG images with PA mode can be extracted. @@ -318,7 +318,7 @@ def test_pa_image_extraction(): assert data == images[0].data -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_1bit_image_extraction(): """Cf issue #1814""" reader = PdfReader(BytesIO(get_data_from_url(name="grimm10"))) @@ -326,7 +326,7 @@ def test_1bit_image_extraction(): p.images -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_png_transparency_reverse(): """Cf issue #1599""" pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf" @@ -340,7 +340,7 @@ def test_png_transparency_reverse(): # assert list(img.getdata()) == list(refimg.getdata()) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1787(): """Cf issue #1787""" reader = PdfReader(BytesIO(get_data_from_url(name="pdf_font_garbled.pdf"))) @@ -357,7 +357,7 @@ def test_iss1787(): assert exc.value.args[0] == "Image data is not rectangular" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_tiff_predictor(): """Decode Tiff Predictor 2 Images""" reader = PdfReader(BytesIO(get_data_from_url(name="tika-977609.pdf"))) @@ -368,7 +368,7 @@ def test_tiff_predictor(): assert list(img.getdata()) == list(refimg.getdata()) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_rgba(): """Decode rgb with transparency""" with PILContext(): @@ -381,7 +381,7 @@ def test_rgba(): assert similarity > 0.99 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_cmyk(): """Decode cmyk""" # JPEG compression @@ -404,7 +404,7 @@ def test_cmyk(): assert image_similarity(data.image, refimg) > 0.999 # lossless compression expected -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1863(): """Test doc from iss1863""" reader = PdfReader(BytesIO(get_data_from_url(name="o1whh9b3.pdf"))) @@ -413,7 +413,7 @@ def test_iss1863(): i.name -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_read_images(): reader = PdfReader(BytesIO(get_data_from_url(name="selbst.72916.pdf"))) page = reader.pages[0] @@ -421,7 +421,7 @@ def test_read_images(): pass -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_cascaded_filters_images(): reader = PdfReader(BytesIO(get_data_from_url(name="iss1912.pdf"))) # for focus, analyse the page 23 @@ -430,13 +430,13 @@ def test_cascaded_filters_images(): _ = i.name, i.image -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_calrgb(): reader = PdfReader(BytesIO(get_data_from_url(name="calRGB.pdf"))) reader.pages[0].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_index_lookup(): """The lookup is provided as an str and bytes""" reader = PdfReader(BytesIO(get_data_from_url(name="2023USDC.pdf"))) @@ -459,7 +459,7 @@ def test_index_lookup(): assert image_similarity(data.image, refimg) > 0.999 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_2bits_image(): """From #1954, test with 2bits image. TODO: 4bits also""" reader = PdfReader(BytesIO(get_data_from_url(name="paid.pdf"))) @@ -470,7 +470,7 @@ def test_2bits_image(): assert image_similarity(data.image, refimg) > 0.99 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_gray_devicen_cmyk(): """ Cf #1979 @@ -487,7 +487,7 @@ def test_gray_devicen_cmyk(): assert image_similarity(data.image, refimg) > 0.999 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_runlengthdecode(): """From #1954, test with 2bits image. TODO: 4bits also""" url = "https://github.com/py-pdf/pypdf/files/12159941/out.pdf" @@ -507,7 +507,7 @@ def test_runlengthdecode(): reader.pages[0].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_gray_separation_cmyk(): """ Cf #1955 @@ -524,7 +524,7 @@ def test_gray_separation_cmyk(): assert image_similarity(data.image, refimg) > 0.999 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_singleton_device(): """From #2023""" url = "https://github.com/py-pdf/pypdf/files/12177287/tt.pdf" @@ -533,7 +533,7 @@ def test_singleton_device(): reader.pages[0].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_jpx_no_spacecode(): """From #2061""" url = "https://github.com/py-pdf/pypdf/files/12253581/tt2.pdf" @@ -548,7 +548,7 @@ def test_jpx_no_spacecode(): assert exc.value.args[0].startswith("ColorSpace field not found") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_encodedstream_lookup(): """From #2124""" url = "https://github.com/py-pdf/pypdf/files/12455580/10.pdf" @@ -557,7 +557,7 @@ def test_encodedstream_lookup(): reader.pages[12].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_convert_1_to_la(): """From #2165""" url = "https://github.com/py-pdf/pypdf/files/12543290/whitepaper.WBT.token.blockchain.whitepaper.pdf" @@ -567,7 +567,7 @@ def test_convert_1_to_la(): _ = i -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_nested_device_n_color_space(): """From #2240""" url = "https://github.com/py-pdf/pypdf/files/12814018/out1.pdf" @@ -576,7 +576,7 @@ def test_nested_device_n_color_space(): reader.pages[0].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.skipif(not HAS_AES, reason="No AES implementation") def test_flate_decode_with_image_mode_1(): """From #2248""" @@ -587,7 +587,7 @@ def test_flate_decode_with_image_mode_1(): _ = image -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_flate_decode_with_image_mode_1__whitespace_at_end_of_lookup(): """From #2331""" url = "https://github.com/py-pdf/pypdf/files/13611048/out1.pdf" diff --git a/tests/test_generic.py b/tests/test_generic.py index cce7ad7b2..4371853b2 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -618,7 +618,7 @@ def test_remove_child_in_tree(): tree.empty_tree() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "caplog_content"), [ @@ -666,8 +666,8 @@ def test_extract_text(caplog, url: str, name: str, caplog_content: str): assert caplog_content in caplog.text -@pytest.mark.slow() -@pytest.mark.enable_socket() +@pytest.mark.slow +@pytest.mark.enable_socket def test_text_string_write_to_stream(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924562.pdf" name = "tika-924562.pdf" @@ -679,7 +679,7 @@ def test_text_string_write_to_stream(): page.compress_content_streams() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_bool_repr(tmp_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/932/932449.pdf" name = "tika-932449.pdf" @@ -699,7 +699,7 @@ def test_bool_repr(tmp_path): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_issue_997(pdf_file_path): url = ( "https://github.com/py-pdf/pypdf/files/8908874/" @@ -880,7 +880,7 @@ def test_cloning(caplog): assert isinstance(obj21.get("/Test2"), IndirectObject) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_append_with_indirectobject_not_pointing(caplog): """ reported in #1631 @@ -895,7 +895,7 @@ def test_append_with_indirectobject_not_pointing(caplog): assert "Object 43 0 not defined." in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1615_1673(): """ test cases where /N is not indicating chains of objects @@ -921,7 +921,7 @@ def test_iss1615_1673(): writer.clone_document_from_reader(reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_destination_withoutzoom(): """Cf issue #1832""" url = "https://github.com/user-attachments/files/15605648/2021_book_security.pdf" @@ -972,7 +972,7 @@ def test_encodedstream_set_data(): aa.set_data(b"toto") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_set_data_2(): """ Modify a stream not yet loaded and @@ -988,7 +988,7 @@ def test_set_data_2(): assert writer.root_object["/AcroForm"]["/XFA"][7].get_object().get_data() == b"test" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_calling_indirect_objects(): """Cope with cases where attributes/items are called from indirectObject""" url = "https://github.com/user-attachments/files/15605648/2021_book_security.pdf" @@ -1008,7 +1008,7 @@ def test_calling_indirect_objects(): ind["/Type"] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_indirect_object_page_dimensions(): url = "https://github.com/py-pdf/pypdf/files/13302338/Zymeworks_Corporate.Presentation_FINAL1101.pdf.pdf" name = "issue2287.pdf" diff --git a/tests/test_images.py b/tests/test_images.py index 5fd7d0968..c0308eb3e 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -74,21 +74,21 @@ def image_similarity( return 1 - mse -@pytest.mark.samples() +@pytest.mark.samples def test_image_similarity_one(): path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" path_b = path_a assert image_similarity(path_a, path_b) == 1 -@pytest.mark.samples() +@pytest.mark.samples def test_image_similarity_zero(): path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" path_b = SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png" assert image_similarity(path_a, path_b) == 0 -@pytest.mark.samples() +@pytest.mark.samples def test_image_similarity_mid(): path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" img_b = Image.open(path_a) @@ -107,7 +107,7 @@ def test_image_similarity_mid(): assert sim2 > 0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_image_new_property(): name = "pdf_font_garbled.pdf" reader = PdfReader(BytesIO(get_data_from_url(name=name))) @@ -203,7 +203,7 @@ def test_image_new_property(): "019-grayscale-image/page-0-X0.png", ], ) -@pytest.mark.samples() +@pytest.mark.samples def test_image_extraction(src, page_index, image_key, expected): reader = PdfReader(src) actual_image = reader.pages[page_index].images[image_key] @@ -214,7 +214,7 @@ def test_image_extraction(src, page_index, image_key, expected): assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(30) def test_loop_in_image_keys(): """Cf #2077""" @@ -223,7 +223,7 @@ def test_loop_in_image_keys(): reader.pages[0].images.keys() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_devicen_cmyk_black_only(): """Cf #2321""" url = "https://github.com/py-pdf/pypdf/files/13501846/Addressing_Adversarial_Attacks.pdf" @@ -239,7 +239,7 @@ def test_devicen_cmyk_black_only(): assert image_similarity(reader.pages[10].images[0].image, img) >= 0.99 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_bi_in_text(): """Cf #2456""" url = "https://github.com/py-pdf/pypdf/files/14322910/BI_text_with_one_image.pdf" @@ -249,7 +249,7 @@ def test_bi_in_text(): assert reader.pages[0].images[0].name == "~0~.png" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_cmyk_no_filter(): """Cf #2522""" url = "https://github.com/py-pdf/pypdf/files/14614887/out3.pdf" @@ -258,7 +258,7 @@ def test_cmyk_no_filter(): reader.pages[0].images[0].image -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_separation_1byte_to_rgb_inverted(): """Cf #2343""" url = "https://github.com/py-pdf/pypdf/files/13679585/test2_P038-038.pdf" @@ -274,7 +274,7 @@ def test_separation_1byte_to_rgb_inverted(): reader.pages[0].images[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_data_with_lf(): """Cf #2343""" url = "https://github.com/py-pdf/pypdf/files/13946477/panda.pdf" @@ -286,7 +286,7 @@ def test_data_with_lf(): assert image_similarity(reader.pages[8].images[9].image, img) == 1.0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_oserror(): """Cf #2265""" url = "https://github.com/py-pdf/pypdf/files/13127130/Binance.discovery.responses.2.gov.uscourts.dcd.256060.140.1.pdf" @@ -316,7 +316,7 @@ def test_oserror(): ), ], ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr): """ Code to create zipfile: @@ -348,7 +348,7 @@ def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr): assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(30) def test_large_compressed_image(): url = "https://github.com/py-pdf/pypdf/files/15306199/file_with_large_compressed_image.pdf" @@ -358,7 +358,7 @@ def test_large_compressed_image(): list(reader.pages[0].images) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_ff_fe_starting_lut(): """Cf issue #2660""" url = "https://github.com/py-pdf/pypdf/files/15385628/original_before_merge.pdf" @@ -374,7 +374,7 @@ def test_ff_fe_starting_lut(): assert image_similarity(reader.pages[1].images[0].image, img) == 1.0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_inline_image_extraction(): """Cf #2598""" url = "https://github.com/py-pdf/pypdf/files/14982414/lebo102.pdf" @@ -443,7 +443,7 @@ def test_inline_image_extraction(): assert image_similarity(reader.pages[0].images[0].image, img) == 1 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extract_image_from_object(caplog): url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf" name = "iss2613.pdf" @@ -464,7 +464,7 @@ def test_extract_image_from_object(caplog): assert "does not seem to be an Image" in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_4bits_images(caplog): url = "https://github.com/user-attachments/files/16624406/tt.pdf" name = "iss2411.pdf" diff --git a/tests/test_javascript.py b/tests/test_javascript.py index 37edb218a..094f8126d 100644 --- a/tests/test_javascript.py +++ b/tests/test_javascript.py @@ -12,7 +12,7 @@ RESOURCE_ROOT = PROJECT_ROOT / "resources" -@pytest.fixture() +@pytest.fixture def pdf_file_writer(): reader = PdfReader(RESOURCE_ROOT / "issue-604.pdf") writer = PdfWriter() diff --git a/tests/test_merger.py b/tests/test_merger.py index c9112eae3..c10e4b9be 100644 --- a/tests/test_merger.py +++ b/tests/test_merger.py @@ -224,7 +224,7 @@ def test_merge_write_closed_fh_with_writer(pdf_file_path): merger.add_outline_item("An outline item", 0) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_trim_outline_list_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/995/995175.pdf" name = "tika-995175.pdf" @@ -236,7 +236,7 @@ def test_trim_outline_list_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_zoom_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/994/994759.pdf" name = "tika-994759.pdf" @@ -247,7 +247,7 @@ def test_zoom_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_zoom_xyz_no_left_with_add_page(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/933/933322.pdf" @@ -260,7 +260,7 @@ def test_zoom_xyz_no_left_with_add_page(pdf_file_path): merger.close() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_zoom_xyz_no_left_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/933/933322.pdf" name = "tika-933322.pdf" @@ -271,8 +271,8 @@ def test_zoom_xyz_no_left_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_outline_item_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/997/997511.pdf" name = "tika-997511.pdf" @@ -283,8 +283,8 @@ def test_outline_item_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_trim_outline_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/982/982336.pdf" name = "tika-982336.pdf" @@ -295,8 +295,8 @@ def test_trim_outline_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test1_with_writer(pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/923/923621.pdf" name = "tika-923621.pdf" @@ -307,8 +307,8 @@ def test1_with_writer(pdf_file_path): merger.close() -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_sweep_recursion1_with_writer(pdf_file_path): # TODO: This test looks like an infinite loop. url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf" @@ -323,8 +323,8 @@ def test_sweep_recursion1_with_writer(pdf_file_path): reader2.pages -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("url", "name"), [ @@ -350,7 +350,7 @@ def test_sweep_recursion2_with_writer(url, name, pdf_file_path): reader2.pages -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_sweep_indirect_list_newobj_is_none_with_writer(caplog, pdf_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/906/906769.pdf" name = "tika-906769.pdf" @@ -365,7 +365,7 @@ def test_sweep_indirect_list_newobj_is_none_with_writer(caplog, pdf_file_path): reader2.pages -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1145_with_writer(): # issue with FitH destination with null param url = "https://github.com/py-pdf/pypdf/files/9164743/file-0.pdf" @@ -375,7 +375,7 @@ def test_iss1145_with_writer(): merger.close() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1344_with_writer(caplog): url = "https://github.com/py-pdf/pypdf/files/9549001/input.pdf" name = "iss1344.pdf" @@ -388,7 +388,7 @@ def test_iss1344_with_writer(caplog): assert "adresse où le malade peut être visité" in p.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_articles_with_writer(caplog): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" name = "924666.pdf" diff --git a/tests/test_page.py b/tests/test_page.py index 39b1f4ec5..7ab59aaeb 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -46,7 +46,7 @@ def get_all_sample_files(): all_files_meta = get_all_sample_files() -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( "meta", [m for m in all_files_meta["data"] if not m["encrypted"]], @@ -63,8 +63,8 @@ def test_read(meta): assert len(reader.pages) == meta["pages"] -@pytest.mark.samples() -@pytest.mark.enable_socket() +@pytest.mark.samples +@pytest.mark.enable_socket @pytest.mark.parametrize( ("pdf_path", "password"), [ @@ -253,7 +253,7 @@ def compare_dict_objects(d1, d2): assert d1[key] == d2[key] -@pytest.mark.slow() +@pytest.mark.slow def test_page_transformations(): pdf_path = RESOURCE_ROOT / "crazyones.pdf" reader = PdfReader(pdf_path) @@ -375,7 +375,7 @@ def test_add_transformation_on_page_without_contents(): assert isinstance(page.get_contents(), ContentStream) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss_1142(): # check fix for problem of context save/restore (q/Q) url = "https://github.com/py-pdf/pypdf/files/9150656/ST.2019.PDF" @@ -393,8 +393,8 @@ def test_iss_1142(): assert txt.find("郑州分公司") > 0 -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("url", "name"), [ @@ -439,8 +439,8 @@ def test_extract_text(url, name): page.extract_text() -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_extract_text_page_pdf_impossible_decode_xform(caplog): url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972962.pdf" name = "tika-972962.pdf" @@ -451,8 +451,8 @@ def test_extract_text_page_pdf_impossible_decode_xform(caplog): assert warn_msgs == [""] # text extraction recognise no text -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_extract_text_operator_t_star(): # L1266, L1267 url = "https://corpora.tika.apache.org/base/docs/govdocs1/967/967943.pdf" name = "tika-967943.pdf" @@ -801,7 +801,7 @@ def test_get_fonts(pdf_path, password, embedded, unembedded): assert (a, b) == (embedded, unembedded) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_fonts2(): url = "https://github.com/py-pdf/pypdf/files/12618104/WS_T.483.8-2016.pdf" name = "WS_T.483.8-2016.pdf" @@ -940,7 +940,7 @@ def test_annotation_setter(pdf_file_path): writer.write(fp) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.xfail(reason="#1091") def test_text_extraction_issue_1091(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/966/966635.pdf" @@ -952,7 +952,7 @@ def test_text_extraction_issue_1091(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_empyt_password_1088(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/941/941536.pdf" name = "tika-941536.pdf" @@ -961,7 +961,7 @@ def test_empyt_password_1088(): len(reader.pages) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_old_habibi(): # this habibi has multiple characters associated with the h reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi.pdf") @@ -971,7 +971,7 @@ def test_old_habibi(): assert "حَبيبي" in txt -@pytest.mark.samples() +@pytest.mark.samples def test_read_link_annotation(): reader = PdfReader(SAMPLE_ROOT / "016-libre-office-link/libre-office-link.pdf") assert len(reader.pages[0].annotations) == 1 @@ -1001,7 +1001,7 @@ def test_read_link_annotation(): assert annot == expected -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_no_resources(): url = "https://github.com/py-pdf/pypdf/files/9572045/108.pdf" name = "108.pdf" @@ -1174,7 +1174,7 @@ def test_merge_page_resources_smoke_test(): assert relevant_operations == expected_operations -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_merge_transformed_page_into_blank(): url = "https://github.com/py-pdf/pypdf/files/10768334/badges_3vjrh_7LXDZ_1-1.pdf" name = "badges_3vjrh_7LXDZ_1.pdf" @@ -1221,7 +1221,7 @@ def test_pages_printing(): reader.pages[0].images["~1~"] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_del_pages(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/941/941536.pdf" name = "tika-941536.pdf" @@ -1280,7 +1280,7 @@ def test_pdf_pages_missing_type(): writer.pages[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_merge_with_stream_wrapped_in_save_restore(): """Test for issue #2587""" url = "https://github.com/py-pdf/pypdf/files/14895914/blank_portrait.pdf" @@ -1293,7 +1293,7 @@ def test_merge_with_stream_wrapped_in_save_restore(): assert b"QQ" not in page_one.get_contents().get_data() -@pytest.mark.samples() +@pytest.mark.samples def test_compression(): """Test for issue #1897""" @@ -1352,7 +1352,7 @@ def test_get_contents_from_nullobject(): page1.merge_page(page2, over=True) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_pos_text_in_textvisitor(): """See #2200""" url = "https://github.com/py-pdf/pypdf/files/12675974/page_178.pdf" @@ -1370,7 +1370,7 @@ def visitor_body2(text, cm, tm, fontdict, fontsize) -> None: assert abs(p[1] - 457.4) < 0.1 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_pos_text_in_textvisitor2(): """See #2075""" url = "https://github.com/py-pdf/pypdf/files/12318042/LegIndex-page6.pdf" @@ -1430,7 +1430,7 @@ def visitor_lvl(text, cm, tm, fontdict, fontsize) -> None: ] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_missing_basefont_in_type3(): """Cf #2289""" url = "https://github.com/py-pdf/pypdf/files/13307713/missing-base-font.pdf" diff --git a/tests/test_page_labels.py b/tests/test_page_labels.py index 3b6927231..2e0926dec 100644 --- a/tests/test_page_labels.py +++ b/tests/test_page_labels.py @@ -75,7 +75,7 @@ def test_number2uppercase_letter(): number2uppercase_letter(-1) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_index2label(caplog): name = "waarom-meisjes-het-beter-doen-op-HAVO-en-VWO-ROA.pdf" r = PdfReader(BytesIO(get_data_from_url(name=name))) @@ -112,7 +112,7 @@ def test_index2label(caplog): assert caplog.text != "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_index2label_kids(): url = "https://github.com/py-pdf/pypdf/files/14858124/Terminologie_Epochen.Schwerpunkte.Umsetzungen.pdf" r = PdfReader(BytesIO(get_data_from_url(url=url, name="index2label_kids.pdf"))) @@ -143,7 +143,7 @@ def test_index2label_kids(): assert r.page_labels == expected -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_index2label_kids__recursive(caplog): url = "https://github.com/py-pdf/pypdf/files/14842446/tt1.pdf" r = PdfReader( diff --git a/tests/test_pdfa.py b/tests/test_pdfa.py index 7ad35fe86..4ed7a4a7f 100644 --- a/tests/test_pdfa.py +++ b/tests/test_pdfa.py @@ -32,7 +32,7 @@ def document_information_has_analoguos_xml(src: BytesIO) -> bool: return document_information_has_analoguos_xml(src) -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( ("src", "diagnostic_write_name"), [ diff --git a/tests/test_reader.py b/tests/test_reader.py index d3cdc875d..e1b7fc26d 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -130,7 +130,7 @@ def test_iss1943(): assert docinfo.creation_date is None -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( "pdf_path", [SAMPLE_ROOT / "017-unreadable-meta-data/unreadablemetadata.pdf"] ) @@ -198,7 +198,7 @@ def test_get_outline(src, outline_elements): assert len(outline) == outline_elements -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( ("src", "expected_images"), [ @@ -656,7 +656,7 @@ def test_do_not_get_stuck_on_large_files_without_start_xref(): assert parse_duration < 60 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_decrypt_when_no_id(): """ Decrypt an encrypted file that's missing the 'ID' value in its trailer. @@ -811,7 +811,7 @@ def test_convert_to_int_error(): assert exc.value.args[0] == "Invalid size in convert_to_int" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss925(): url = "https://github.com/py-pdf/pypdf/files/8796328/1.pdf" reader = PdfReader(BytesIO(get_data_from_url(url, name="iss925.pdf"))) @@ -867,7 +867,7 @@ def test_read_not_binary_mode(caplog): assert normalize_warnings(caplog.text) == [msg] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.skipif(not HAS_AES, reason="No AES algorithm available") def test_read_form_416(): url = ( @@ -906,7 +906,7 @@ def test_form_topname_with_and_without_acroform(caplog): assert "have a non-expected parent" in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extract_text_xref_issue_2(caplog): # pdf/0264cf510015b2a4b395a15cb23c001e.pdf url = "https://corpora.tika.apache.org/base/docs/govdocs1/981/981961.pdf" @@ -920,8 +920,8 @@ def test_extract_text_xref_issue_2(caplog): assert normalize_warnings(caplog.text) == msg -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow def test_extract_text_xref_issue_3(caplog): # pdf/0264cf510015b2a4b395a15cb23c001e.pdf url = "https://corpora.tika.apache.org/base/docs/govdocs1/977/977774.pdf" @@ -934,7 +934,7 @@ def test_extract_text_xref_issue_3(caplog): assert normalize_warnings(caplog.text) == msg -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extract_text_pdf15(): # pdf/0264cf510015b2a4b395a15cb23c001e.pdf url = "https://corpora.tika.apache.org/base/docs/govdocs1/976/976030.pdf" @@ -943,7 +943,7 @@ def test_extract_text_pdf15(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extract_text_xref_table_21_bytes_clrf(): # pdf/0264cf510015b2a4b395a15cb23c001e.pdf url = "https://corpora.tika.apache.org/base/docs/govdocs1/956/956939.pdf" @@ -952,7 +952,7 @@ def test_extract_text_xref_table_21_bytes_clrf(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_fields(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972486.pdf" name = "tika-972486.pdf" @@ -965,7 +965,7 @@ def test_get_fields(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_full_qualified_fields(): url = "https://github.com/py-pdf/pypdf/files/10142389/fields_with_dots.pdf" name = "fields_with_dots.pdf" @@ -985,7 +985,7 @@ def test_get_full_qualified_fields(): assert fields["customer.name"]["/T"] == "name" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning") def test_get_fields_read_else_block(): # covers also issue 1089 @@ -994,7 +994,7 @@ def test_get_fields_read_else_block(): PdfReader(BytesIO(get_data_from_url(url, name=name))) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_fields_read_else_block2(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/914/914902.pdf" name = "tika-914902.pdf" @@ -1003,7 +1003,7 @@ def test_get_fields_read_else_block2(): assert fields is None -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning") def test_get_fields_read_else_block3(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/957/957721.pdf" @@ -1011,7 +1011,7 @@ def test_get_fields_read_else_block3(): PdfReader(BytesIO(get_data_from_url(url, name=name))) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_metadata_is_none(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/963/963692.pdf" name = "tika-963692.pdf" @@ -1019,7 +1019,7 @@ def test_metadata_is_none(): assert reader.metadata is None -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_fields_read_write_report(txt_file_path): url = "https://corpora.tika.apache.org/base/docs/govdocs1/909/909655.pdf" name = "tika-909655.pdf" @@ -1041,7 +1041,7 @@ def test_xfa(src): assert reader.xfa is None -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_xfa_non_empty(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/942/942050.pdf" name = "tika-942050.pdf" @@ -1069,13 +1069,13 @@ def test_header(src, pdf_header): assert reader.pdf_header == pdf_header -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_outline_color(): reader = PdfReader(BytesIO(get_data_from_url(name="tika-924546.pdf"))) assert reader.outline[0].color == [0, 0, 1] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_outline_font_format(): reader = PdfReader(BytesIO(get_data_from_url(name="tika-924546.pdf"))) assert reader.outline[0].font_format == 2 @@ -1094,7 +1094,7 @@ def get_outline_property(outline, attribute_name: str): return results -@pytest.mark.samples() +@pytest.mark.samples def test_outline_title_issue_1121(): reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf") @@ -1141,7 +1141,7 @@ def test_outline_title_issue_1121(): ] -@pytest.mark.samples() +@pytest.mark.samples def test_outline_count(): reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf") @@ -1200,7 +1200,7 @@ def test_outline_missing_title(caplog): assert reader.outline[0]["/Title"] == "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -1223,7 +1223,7 @@ def test_named_destination(url, name): assert len(reader.named_destinations) > 0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_outline_with_missing_named_destination(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/913/913678.pdf" name = "tika-913678.pdf" @@ -1232,7 +1232,7 @@ def test_outline_with_missing_named_destination(): assert reader.outline[1][0].title.startswith("Report for 2002AZ3B: Microbial") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_outline_with_empty_action(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924546.pdf" name = "tika-924546.pdf" @@ -1249,7 +1249,7 @@ def test_outline_with_invalid_destinations(): assert len(reader.outline) == 9 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_pdfreader_multiple_definitions(caplog): """iss325""" url = "https://github.com/py-pdf/pypdf/files/9176644/multipledefs.pdf" @@ -1275,7 +1275,7 @@ def test_get_page_number_by_indirect(): reader._get_page_number_by_indirect(1) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_corrupted_xref_table(): # issue #1292 url = "https://github.com/py-pdf/pypdf/files/9444747/BreezeManual.orig.pdf" @@ -1288,7 +1288,7 @@ def test_corrupted_xref_table(): reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_reader(caplog): # iss #1273 url = "https://github.com/py-pdf/pypdf/files/9464742/shiv_resume.pdf" @@ -1304,7 +1304,7 @@ def test_reader(caplog): assert caplog.text == "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_zeroing_xref(): # iss #328 url = ( @@ -1316,7 +1316,7 @@ def test_zeroing_xref(): len(reader.pages) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_thread(): url = ( "https://github.com/py-pdf/pypdf/files/9066120/" @@ -1332,7 +1332,7 @@ def test_thread(): assert len(reader.threads) >= 1 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_build_outline_item(caplog): url = "https://github.com/py-pdf/pypdf/files/9464742/shiv_resume.pdf" name = "shiv_resume.pdf" @@ -1360,7 +1360,7 @@ def test_build_outline_item(caplog): assert "Unexpected destination 2" in exc.value.args[0] -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( ("src", "page_labels"), [ @@ -1384,7 +1384,7 @@ def test_page_labels(src, page_labels): assert PdfReader(src).page_labels[:max_indices] == page_labels[:max_indices] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1559(): url = "https://github.com/py-pdf/pypdf/files/10441992/default.pdf" name = "iss1559.pdf" @@ -1393,7 +1393,7 @@ def test_iss1559(): p.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1652(): # test of an annotation(link) directly stored in the /Annots in the page url = "https://github.com/py-pdf/pypdf/files/10818844/tt.pdf" @@ -1402,7 +1402,7 @@ def test_iss1652(): reader.named_destinations -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1689(): url = "https://github.com/py-pdf/pypdf/files/10948283/error_file_without_data.pdf" name = "iss1689.pdf" @@ -1410,7 +1410,7 @@ def test_iss1689(): reader.pages[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1710(): url = "https://github.com/py-pdf/pypdf/files/15234776/irbookonlinereading.pdf" name = "irbookonlinereading.pdf" @@ -1452,7 +1452,7 @@ def test_broken_file_header(): PdfReader(io.BytesIO(pdf_data)) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1756(): url = "https://github.com/py-pdf/pypdf/files/11105591/641-Attachment-B-Pediatric-Cardiac-Arrest-8-1-2019.pdf" name = "iss1756.pdf" @@ -1461,7 +1461,7 @@ def test_iss1756(): # removed to cope with missing cryptodome during commit check : len(reader.pages) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(30) def test_iss1825(): url = "https://github.com/py-pdf/pypdf/files/11367871/MiFO_LFO_FEIS_NOA_Published.3.pdf" @@ -1471,7 +1471,7 @@ def test_iss1825(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss2082(): url = "https://github.com/py-pdf/pypdf/files/12317939/test.pdf" name = "iss2082.pdf" @@ -1485,7 +1485,7 @@ def test_iss2082(): reader = PdfReader(BytesIO(bb)) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_issue_140(): url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf" name = "issue-140.pdf" @@ -1494,7 +1494,7 @@ def test_issue_140(): assert len(reader.pages) == 54 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_xyz_with_missing_param(): """Cf #2236""" url = "https://github.com/py-pdf/pypdf/files/12795356/tt1.pdf" @@ -1506,7 +1506,7 @@ def test_xyz_with_missing_param(): assert reader.outline[0]["/Top"] == 0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_corrupted_xref(): url = "https://github.com/py-pdf/pypdf/files/14628314/iss2516.pdf" name = "iss2516.pdf" @@ -1514,7 +1514,7 @@ def test_corrupted_xref(): assert reader.root_object["/Type"] == "/Catalog" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_truncated_xref(caplog): url = "https://github.com/py-pdf/pypdf/files/14843553/002-trivial-libre-office-writer-broken.pdf" name = "iss2575.pdf" @@ -1522,7 +1522,7 @@ def test_truncated_xref(caplog): assert "Invalid/Truncated xref table. Rebuilding it." in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_damaged_pdf(): url = "https://github.com/py-pdf/pypdf/files/15186107/malformed_pdf.pdf" name = "malformed_pdf.pdf" @@ -1536,7 +1536,7 @@ def test_damaged_pdf(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(10) def test_looping_form(caplog): """Cf iss 2643""" @@ -1598,7 +1598,7 @@ def test_context_manager_with_stream(): assert not pdf_stream.closed -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(10) def test_iss2761(): url = "https://github.com/user-attachments/files/16312198/crash-b26d05712a29b241ac6f9dc7fff57428ba2d1a04.pdf" @@ -1608,7 +1608,7 @@ def test_iss2761(): reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss2817(): """Test for rebuiling Xref_ObjStm""" url = "https://github.com/user-attachments/files/16764070/crash-7e1356f1179b4198337f282304cb611aea26a199.pdf" @@ -1620,7 +1620,7 @@ def test_iss2817(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_truncated_files(caplog): """Cf #2853""" url = "https://github.com/user-attachments/files/16796095/f5471sm-2.pdf" @@ -1644,7 +1644,7 @@ def test_truncated_files(caplog): assert reader._startxref < 100993 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_comments_in_array(caplog): """Cf #2843: this deals with comments""" url = "https://github.com/user-attachments/files/16992416/crash-2347912aa2a6f0fab5df4ebc8a424735d5d0d128.pdf" @@ -1659,7 +1659,7 @@ def test_comments_in_array(caplog): reader.pages[0] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_space_in_names_to_continue_processing(caplog): """ This deals with space not encoded in names inducing errors. @@ -1705,7 +1705,7 @@ def test_space_in_names_to_continue_processing(caplog): obj = reader.get_object(70) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_unbalanced_brackets_in_dictionary_object(caplog): """Cf #2877""" url = "https://github.com/user-attachments/files/17162634/7f40cb209fb97d1782bffcefc5e7be40.pdf" @@ -1714,7 +1714,7 @@ def test_unbalanced_brackets_in_dictionary_object(caplog): assert len(reader.pages) == 43 # note: /Count = 46 but 3 kids are None -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_repair_root(caplog): """Cf #2877""" url = "https://github.com/user-attachments/files/17162216/crash-6620e8b1abfe3da639b654595da859b87f985748.pdf" diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py index 1908e7f15..c04d4ceb0 100644 --- a/tests/test_text_extraction.py +++ b/tests/test_text_extraction.py @@ -21,7 +21,7 @@ SAMPLE_ROOT = PROJECT_ROOT / "sample-files" -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize(("visitor_text"), [None, lambda a, b, c, d, e: None]) def test_multi_language(visitor_text): reader = PdfReader(RESOURCE_ROOT / "multilang.pdf") @@ -108,7 +108,7 @@ def visitor_text(text, cm, tm, font_dict, font_size) -> None: @pytest.mark.xfail(reason="known whitespace issue #2336") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_issue_2336(): name = "Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf" reader = PdfReader(BytesIO(get_data_from_url(name=name))) @@ -131,7 +131,7 @@ def test_layout_mode_font_class_to_dict(): } -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_layout_mode_epic_page_fonts(): url = "https://github.com/py-pdf/pypdf/files/13836944/Epic.Page.PDF" name = "Epic Page.PDF" @@ -147,7 +147,7 @@ def test_layout_mode_uncommon_operators(): assert expected == reader.pages[0].extract_text(extraction_mode="layout") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_layout_mode_type0_font_widths(): # Cover both the 'int int int' and 'int [int int ...]' formats for Type0 # /DescendantFonts /W array entries. @@ -160,7 +160,7 @@ def test_layout_mode_type0_font_widths(): assert expected == reader.pages[0].extract_text(extraction_mode="layout") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_layout_mode_indirect_sequence_font_widths(): # Cover the situation where the sequence for font widths is an IndirectObject # ref https://github.com/py-pdf/pypdf/pull/2788 @@ -191,7 +191,7 @@ def test_layout_mode_warnings(mock_logger_warning): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_space_with_one_unit_smaller_than_font_width(): """Tests for #1328""" url = "https://github.com/py-pdf/pypdf/files/9498481/0004.pdf" @@ -202,7 +202,7 @@ def test_space_with_one_unit_smaller_than_font_width(): assert "Reporting crude oil leak.\n" in extracted -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_space_position_calculation(): """Tests for #1153""" url = "https://github.com/py-pdf/pypdf/files/9164743/file-0.pdf" diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 538502348..daaf84f05 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -191,8 +191,8 @@ def test_rotate_45(): assert exc.value.args[0] == "Rotation angle must be a multiple of 90" -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("enable", "url", "pages"), [ @@ -273,7 +273,7 @@ def test_extract_textbench(enable, url, pages, print_result=False): pass -@pytest.mark.slow() +@pytest.mark.slow def test_orientations(): p = PdfReader(RESOURCE_ROOT / "test Orient.pdf").pages[0] p.extract_text("", "") @@ -308,8 +308,8 @@ def test_orientations(): ), f"extract_text({req}) => {rst}" -@pytest.mark.samples() -@pytest.mark.enable_socket() +@pytest.mark.samples +@pytest.mark.enable_socket @pytest.mark.parametrize( ("base_path", "overlay_path"), [ @@ -341,8 +341,8 @@ def test_overlay(pdf_file_path, base_path, overlay_path): writer.write(fp) -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("url", "name"), [ @@ -362,7 +362,7 @@ def test_merge_with_warning(tmp_path, url, name): merger.write(tmp_path / "tmp.merged.pdf") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -381,7 +381,7 @@ def test_merge(tmp_path, url, name): merger.write(tmp_path / "tmp.merged.pdf") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "expected_metadata"), [ @@ -407,7 +407,7 @@ def test_get_metadata(url, name, expected_metadata): assert expected_metadata == data -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "strict", "exception"), [ @@ -493,7 +493,7 @@ def test_extract_text(url, name, strict, exception): assert ex_info.value.args[0] == exc_text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -529,7 +529,7 @@ def test_compress_raised(url, name): page.compress_content_streams() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -554,7 +554,7 @@ def test_get_fields_warns(tmp_path, caplog, url, name): ] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -574,7 +574,7 @@ def test_get_fields_no_warning(tmp_path, url, name): assert len(retrieved_fields) == 10 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_scale_rectangle_indirect_object(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/999/999944.pdf" name = "tika-999944.pdf" @@ -613,7 +613,7 @@ def test_merge_output(caplog): merger.close() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -685,7 +685,7 @@ def test_image_extraction(url, name): Path(filepath).unlink() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_image_extraction_strict(): # Emits log messages url = "https://corpora.tika.apache.org/base/docs/govdocs1/914/914102.pdf" @@ -713,7 +713,7 @@ def test_image_extraction_strict(): Path(filepath).unlink() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -747,7 +747,7 @@ def test_image_extraction2(url, name): Path(filepath).unlink() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -767,7 +767,7 @@ def test_get_outline(url, name): reader.outline -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name"), [ @@ -787,7 +787,7 @@ def test_get_xfa(url, name): reader.xfa -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "strict"), [ @@ -820,7 +820,7 @@ def test_get_fonts(url, name, strict): page._get_fonts() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "strict"), [ @@ -878,7 +878,7 @@ def test_get_xmp(url, name, strict): xmp_info.custom_properties -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_tounicode_is_identity(): url = "https://github.com/py-pdf/pypdf/files/9998335/FP_Thesis.pdf" name = "FP_Thesis.pdf" @@ -887,7 +887,7 @@ def test_tounicode_is_identity(): reader.pages[0].extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_append_forms(): # from #1538 writer = PdfWriter() @@ -912,7 +912,7 @@ def test_append_forms(): ) + len(reader2.get_form_text_fields()) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extra_test_iss1541(): url = "https://github.com/py-pdf/pypdf/files/10418158/tst_iss1541.pdf" name = "tst_iss1541.pdf" @@ -943,7 +943,7 @@ def test_extra_test_iss1541(): assert exc.value.args[0] == "Unexpected end of stream" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_fields_returning_stream(): """This problem was reported in #424""" url = "https://github.com/mstamy2/PyPDF2/files/1948267/Simple.form.pdf" @@ -997,7 +997,7 @@ def test_replace_image(tmp_path): pypdf._page.pil_not_imported = False -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_inline_images(): """This problem was reported in #424""" url = "https://arxiv.org/pdf/2201.00151.pdf" @@ -1035,7 +1035,7 @@ def test_inline_images(): assert len(reader.pages[0].images) == 3 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss(): url = "https://github.com/py-pdf/pypdf/files/11801077/lv2018tconv.pdf" name = "lv2018tconv.pdf" @@ -1045,7 +1045,7 @@ def test_iss(): page.extract_text() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_cr_with_cm_operation(): """Issue #2138""" url = "https://github.com/py-pdf/pypdf/files/12483807/AEO.1172.pdf" @@ -1068,7 +1068,7 @@ def remove_trailing_whitespace(text: str) -> str: return "\n".join(line.rstrip() for line in text.splitlines()) -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( ("pdf_path", "expected_path"), [ @@ -1090,7 +1090,7 @@ def test_text_extraction_layout_mode(pdf_path, expected_path): assert remove_trailing_whitespace(actual) == remove_trailing_whitespace(expected) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_layout_mode_space_vertically(): reader = PdfReader(BytesIO(get_data_from_url(name="iss2138.pdf"))) # remove automatically added final newline @@ -1102,7 +1102,7 @@ def test_layout_mode_space_vertically(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("rotation", "strip_rotated"), [(90, True), (180, False), (270, True)] ) @@ -1131,7 +1131,7 @@ def test_text_extraction_invalid_mode(): reader.pages[0].extract_text(extraction_mode="foo") # type: ignore -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_page_showing_field(): """ Uses testfile from #2452 in order to get fields on multiple pages, @@ -1288,7 +1288,7 @@ def test_get_page_showing_field(): ] == [] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_extract_empty_page(): """Cf #2533""" url = "https://github.com/py-pdf/pypdf/files/14718318/test.pdf" @@ -1297,7 +1297,7 @@ def test_extract_empty_page(): assert reader.pages[1].extract_text(extraction_mode="layout") == "" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss2815(): """Cf #2815""" url = "https://github.com/user-attachments/files/16760725/crash-c1920c7a064649e1191d7879952ec252473fc7e6.pdf" diff --git a/tests/test_writer.py b/tests/test_writer.py index 0cd2d03f8..0e9d9a054 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -126,6 +126,7 @@ def writer_operate(writer: PdfWriter) -> None: Args: writer: A PdfWriter object + """ pdf_path = RESOURCE_ROOT / "crazyones.pdf" pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf" @@ -344,7 +345,7 @@ def test_remove_images(pdf_file_path, input_path): assert "Lorem ipsum dolor sit amet" in extracted_text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_remove_images_sub_level(): """Cf #2035""" url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf" @@ -831,8 +832,8 @@ def test_append_pages_from_reader_append(): writer.write(b) -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_sweep_indirect_references_nullobject_exception(pdf_file_path): # TODO: Check this more closely... this looks weird @@ -844,8 +845,8 @@ def test_sweep_indirect_references_nullobject_exception(pdf_file_path): merger.write(pdf_file_path) -@pytest.mark.enable_socket() -@pytest.mark.slow() +@pytest.mark.enable_socket +@pytest.mark.slow @pytest.mark.parametrize( ("url", "name"), [ @@ -953,7 +954,7 @@ def test_add_single_annotation(pdf_file_path): writer.write(fp) -@pytest.mark.samples() +@pytest.mark.samples def test_colors_in_outline_item(pdf_file_path): reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") writer = PdfWriter() @@ -974,7 +975,7 @@ def test_colors_in_outline_item(pdf_file_path): ] -@pytest.mark.samples() +@pytest.mark.samples def test_write_empty_stream(): reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") writer = PdfWriter() @@ -1026,7 +1027,7 @@ def test_startup_dest(): pdf_file_writer.open_destination = None -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss471(): url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf" name = "book_471.pdf" @@ -1039,7 +1040,7 @@ def test_iss471(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_reset_translation(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" name = "tika-924666.pdf" @@ -1077,7 +1078,7 @@ def test_threads_empty(): assert thr == thr2 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_append_without_annots_and_articles(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" name = "tika-924666.pdf" @@ -1096,7 +1097,7 @@ def test_append_without_annots_and_articles(): assert len(writer.threads) >= 1 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_append_multiple(): url = "https://corpora.tika.apache.org/base/docs/govdocs1/924/924666.pdf" name = "tika-924666.pdf" @@ -1111,7 +1112,7 @@ def test_append_multiple(): assert pages[-1] not in pages[0:-1] # page not repeated -@pytest.mark.samples() +@pytest.mark.samples def test_set_page_label(pdf_file_path): src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf" # File without labels reader = PdfReader(src) @@ -1246,7 +1247,7 @@ def test_set_page_label(pdf_file_path): writer.write(pdf_file_path) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1601(): url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf" name = "badge-38.pdf" @@ -1315,7 +1316,7 @@ def test_attachments(): assert reader.attachments["foobar2.txt"][1] == b"2nd_foobarcontent" -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1614(): # test of an annotation(link) directly stored in the /Annots in the page url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf" @@ -1330,7 +1331,7 @@ def test_iss1614(): writer.append(reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_new_removes(): # test of an annotation(link) directly stored in the /Annots in the page url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf" @@ -1384,7 +1385,7 @@ def test_new_removes(): writer.remove_annotations("/Text") -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_late_iss1654(): url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf" name = "bid1.pdf" @@ -1397,7 +1398,7 @@ def test_late_iss1654(): writer.write(b) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1723(): # test of an annotation(link) directly stored in the /Annots in the page url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf" @@ -1407,7 +1408,7 @@ def test_iss1723(): writer.append(reader, (3, 5)) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1767(): # test with a pdf which is buggy because the object 389,0 exists 3 times: # twice to define catalog and one as an XObject inducing a loop when @@ -1418,7 +1419,7 @@ def test_iss1767(): PdfWriter(clone_from=reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_named_dest_page_number(): """ Closes iss471 @@ -1515,7 +1516,7 @@ def test_update_form_fields(tmp_path): Path(write_data_here).unlink() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_update_form_fields2(): myFiles = { "test1": { @@ -1596,7 +1597,7 @@ def test_update_form_fields2(): } -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_iss1862(): # The file here has "/B" entry to define the font in a object below the page # The excluded field shall be considered only at first level (page) and not @@ -1623,7 +1624,7 @@ def test_empty_objects_before_cloning(): assert len(writer._objects) == nb_obj_reader -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_watermark(): url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf" name = "bgwatermark.pdf" @@ -1642,7 +1643,7 @@ def test_watermark(): assert len(b.getvalue()) < 2.1 * 1024 * 1024 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.timeout(4) def test_watermarking_speed(): url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf" @@ -1659,7 +1660,7 @@ def test_watermarking_speed(): assert pdf_size_in_mib < 20 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript") def test_watermark_rendering(tmp_path): """Ensure the visual appearance of watermarking stays correct.""" @@ -1684,7 +1685,7 @@ def test_watermark_rendering(tmp_path): # False positive: https://github.com/PyCQA/bandit/issues/333 subprocess.run( - [ # noqa: S603 + [ GHOSTSCRIPT_BINARY, "-sDEVICE=pngalpha", "-o", @@ -1721,7 +1722,7 @@ def test_watermarking_reportlab_rendering(tmp_path): writer.write(pdf_path) # False positive: https://github.com/PyCQA/bandit/issues/333 subprocess.run( - [ # noqa: S603 + [ GHOSTSCRIPT_BINARY, "-r120", "-sDEVICE=pngalpha", @@ -1734,7 +1735,7 @@ def test_watermarking_reportlab_rendering(tmp_path): assert image_similarity(png_path, target_png_path) >= 0.999 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_da_missing_in_annot(): url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf" name = "BuildingDivisionPermitApplication.pdf" @@ -1817,7 +1818,7 @@ def test_missing_info(): assert b"/Info" not in b.getvalue() -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_germanfields(): """Cf #2035""" url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf" @@ -1840,7 +1841,7 @@ def test_germanfields(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_no_t_in_articles(): """Cf #2078""" url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf" @@ -1850,7 +1851,7 @@ def test_no_t_in_articles(): writer.append(reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_no_i_in_articles(): """Cf #2089""" url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf" @@ -1860,7 +1861,7 @@ def test_no_i_in_articles(): writer.append(reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_damaged_pdf_length_returning_none(): """ Cf #140 @@ -1873,7 +1874,7 @@ def test_damaged_pdf_length_returning_none(): writer.append(reader) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_viewerpreferences(): """Add Tests for ViewerPreferences""" url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf" @@ -1956,7 +1957,7 @@ def test_extra_spaces_in_da_text(caplog): assert b"(abcd)" in t -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_object_contains_indirect_reference_to_self(): url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf" name = "iss2102.pdf" @@ -1997,7 +1998,7 @@ def test_remove_image_per_type(): assert len(writer.pages[0]["/Resources"]["/XObject"]) == 0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_add_outlines_on_empty_dict(): """Cf #2233""" @@ -2143,7 +2144,7 @@ def create_number_pdf(n) -> BytesIO: assert "Cannot find page in pages" in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_reattach_fields(): """ Test Reattach function @@ -2243,7 +2244,7 @@ def test_init_without_named_arg(): assert len(writer._objects) == nb -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_i_in_choice_fields(): """Cf #2611""" url = "https://github.com/py-pdf/pypdf/files/15176321/FRA.F.6180.150.pdf" @@ -2279,7 +2280,7 @@ def test_selfont(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_no_ressource_for_14_std_fonts(caplog): """Cf #2670""" url = "https://github.com/py-pdf/pypdf/files/15405390/f1040.pdf" @@ -2295,7 +2296,7 @@ def test_no_ressource_for_14_std_fonts(caplog): assert "Font dictionary for /Helvetica not found." in caplog.text -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_field_box_upside_down(): """Cf #2724""" url = "https://github.com/user-attachments/files/15996356/FRA.F.6180.55.pdf" @@ -2312,7 +2313,7 @@ def test_field_box_upside_down(): assert box[3] > 0 -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_matrix_entry_in_field_annots(): """Cf #2731""" url = "https://github.com/user-attachments/files/16036514/template.pdf" @@ -2326,7 +2327,7 @@ def test_matrix_entry_in_field_annots(): assert "/Matrix" in writer.pages[0]["/Annots"][5].get_object()["/AP"]["/N"] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_compress_identical_objects(): """Cf #2728 and #2794""" url = "https://github.com/user-attachments/files/16575458/tt2.pdf" @@ -2376,7 +2377,7 @@ def test_utf16_metadata(): ) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_increment_writer(caplog): """Tests for #2811""" writer = PdfWriter( @@ -2470,7 +2471,7 @@ def test_increment_writer(caplog): writer.write(b) -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_append_pdf_with_dest_without_page(caplog): """Tests for #2842""" url = "https://github.com/user-attachments/files/16990834/test.pdf" diff --git a/tests/test_xmp.py b/tests/test_xmp.py index 6615b93c8..a64289c06 100644 --- a/tests/test_xmp.py +++ b/tests/test_xmp.py @@ -18,7 +18,7 @@ SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files" -@pytest.mark.samples() +@pytest.mark.samples @pytest.mark.parametrize( "src", [ @@ -142,7 +142,7 @@ def test_identity_function(x): assert pypdf.xmp._identity(x) == x -@pytest.mark.enable_socket() +@pytest.mark.enable_socket @pytest.mark.parametrize( ("url", "name", "xmpmm_instance_id"), [ @@ -162,7 +162,7 @@ def test_xmpmm_instance_id(url, name, xmpmm_instance_id): assert xmp_metadata.xmpmm_instance_id == xmpmm_instance_id -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_xmp_dc_description_extraction(): """XMP dc_description is correctly extracted.""" url = "https://corpora.tika.apache.org/base/docs/govdocs1/953/953770.pdf" @@ -178,7 +178,7 @@ def test_xmp_dc_description_extraction(): } -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_dc_creator_extraction(): """XMP dc_creator is correctly extracted.""" url = "https://corpora.tika.apache.org/base/docs/govdocs1/953/953770.pdf" @@ -190,7 +190,7 @@ def test_dc_creator_extraction(): assert xmp_metadata.dc_creator == ["U.S. Fish and Wildlife Service"] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_custom_properties_extraction(): """XMP custom_properties is correctly extracted.""" url = "https://corpora.tika.apache.org/base/docs/govdocs1/986/986065.pdf" @@ -202,7 +202,7 @@ def test_custom_properties_extraction(): assert xmp_metadata.custom_properties == {"Style": "Searchable Image (Exact)"} -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_dc_subject_extraction(): """XMP dc_subject is correctly extracted.""" url = "https://corpora.tika.apache.org/base/docs/govdocs1/959/959519.pdf" @@ -234,7 +234,7 @@ def test_dc_subject_extraction(): ] -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_invalid_xmp_information_handling(): """ Invalid XML in xmp_metadata is gracefully handled. diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 6e1843585..fb899f446 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -11,7 +11,7 @@ from . import get_data_from_url -@pytest.mark.enable_socket() +@pytest.mark.enable_socket def test_get_imagemode_recursion_depth(): """Avoid infinite recursion for nested color spaces.""" url = "https://github.com/py-pdf/pypdf/files/12814018/out1.pdf" From 6fe6be141231f618d2c8ab15092723ddfe481d2b Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:28:45 +0200 Subject: [PATCH 4/8] fix remaining ruff violations --- pypdf/_text_extraction/_layout_mode/_text_state_manager.py | 2 +- pypdf/_writer.py | 2 +- tests/test_filters.py | 2 +- tests/test_workflows.py | 2 +- tests/test_writer.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pypdf/_text_extraction/_layout_mode/_text_state_manager.py b/pypdf/_text_extraction/_layout_mode/_text_state_manager.py index 8851e1a94..1af77fbb5 100644 --- a/pypdf/_text_extraction/_layout_mode/_text_state_manager.py +++ b/pypdf/_text_extraction/_layout_mode/_text_state_manager.py @@ -106,7 +106,7 @@ def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams: except (UnicodeEncodeError, UnicodeDecodeError): txt = value.decode("utf-8", "replace") txt = "".join( - self.font.char_map[x] if x in self.font.char_map else x for x in txt + self.font.char_map.get(x, x) for x in txt ) else: txt = value diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 01a0ea6cb..223464be0 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -2930,7 +2930,7 @@ def add_filtered_articles( thr = thr.get_object() if thr.indirect_reference.idnum not in self._id_translated[ id(reader) - ] and fltr.search((thr["/I"] if "/I" in thr else {}).get("/Title", "")): + ] and fltr.search((thr.get("/I", {})).get("/Title", "")): self._add_articles_thread(thr, pages, reader) def _get_cloned_page( diff --git a/tests/test_filters.py b/tests/test_filters.py index 4e87d9c60..23b90cca8 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -278,7 +278,7 @@ def test_image_without_pillow(tmp_path): env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"] except KeyError: env["PYTHONPATH"] = "." - result = subprocess.run( + result = subprocess.run( # noqa: S603 # We have the control here. [shutil.which("python"), source_file], capture_output=True, env=env, diff --git a/tests/test_workflows.py b/tests/test_workflows.py index daaf84f05..4b7026d58 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -1025,7 +1025,7 @@ def test_inline_images(): _a = {} for x, y in reader.pages[2].images[0:-2].items(): - _a[x] = y + _a[x] = y # noqa: PERF403 # Testing code and easier to read this way. with pytest.raises(KeyError) as exc: reader.pages[2]._get_image(("test",)) diff --git a/tests/test_writer.py b/tests/test_writer.py index 0e9d9a054..672f2378a 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -1684,7 +1684,7 @@ def test_watermark_rendering(tmp_path): writer.write(pdf_path) # False positive: https://github.com/PyCQA/bandit/issues/333 - subprocess.run( + subprocess.run( # noqa: S603 [ GHOSTSCRIPT_BINARY, "-sDEVICE=pngalpha", @@ -1721,7 +1721,7 @@ def test_watermarking_reportlab_rendering(tmp_path): writer.write(pdf_path) # False positive: https://github.com/PyCQA/bandit/issues/333 - subprocess.run( + subprocess.run( # noqa: S603 [ GHOSTSCRIPT_BINARY, "-r120", From 514edb421c7b9af0a5e9a10fa1314994e3690dad Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:33:30 +0200 Subject: [PATCH 5/8] fix mypy issues --- pypdf/_writer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 223464be0..3d1fac029 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1912,9 +1912,9 @@ def add_outline_item( page_ref: Union[None, NullObject, IndirectObject, NumberObject] if isinstance(italic, Fit): # it means that we are on the old params if fit is not None and page_number is None: - page_number = fit # type: ignore + page_number = fit return self.add_outline_item( - title, page_number, parent, None, before, color, bold, italic, is_open=is_open # type: ignore + title, page_number, parent, None, before, color, bold, italic, is_open=is_open ) if page_number is None: action_ref = None From f728918cf02f9035f4be8a0a97e47edf39b860c2 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:39:53 +0200 Subject: [PATCH 6/8] further mypy fixes --- pypdf/_doc_common.py | 2 +- pypdf/_xobj_image_helpers.py | 4 ++-- pypdf/generic/_fit.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 3baff2024..4505861f1 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -971,7 +971,7 @@ def _build_destination( # create a link to first Page tmp = self.pages[0].indirect_reference indirect_reference = NullObject() if tmp is None else tmp - return Destination(title, indirect_reference, Fit.fit()) # type: ignore + return Destination(title, indirect_reference, Fit.fit()) def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]: dest, title, outline_item = None, None, None diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 5687b3233..7a89525a6 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -229,7 +229,7 @@ def _handle_flate( [ b"".join( [ - colors_arr[1 if img.getpixel((x, y)) > 127 else 0] + colors_arr[1 if img.getpixel((x, y)) > 127 else 0] # type: ignore[operator] for x in range(img.size[0]) ] ) @@ -296,7 +296,7 @@ def _handle_jpx( # we need to convert to the good mode if img1.mode == mode or {img1.mode, mode} == {"L", "P"}: # compare (unordered) sets # L,P are indexed modes which should not be changed. - img = img1 + img: Image.Image = img1 elif {img1.mode, mode} == {"RGBA", "CMYK"}: # RGBA / CMYK are 4bytes encoding where # the encoding should be corrected diff --git a/pypdf/generic/_fit.py b/pypdf/generic/_fit.py index e8f2009a2..165484086 100644 --- a/pypdf/generic/_fit.py +++ b/pypdf/generic/_fit.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Tuple, Union +from typing import Any, List, Optional, Tuple, Union from ._base import is_null_or_none @@ -7,10 +7,10 @@ class Fit: def __init__( self, fit_type: str, fit_args: Tuple[Union[None, float, Any], ...] = () ): - from ._base import FloatObject, NameObject, NullObject + from ._base import FloatObject, NameObject, NullObject, NumberObject self.fit_type = NameObject(fit_type) - self.fit_args = [ + self.fit_args: List[Union[NullObject, FloatObject, NumberObject]] = [ NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args ] From 69d526f41b28bae42f01b4e18eb8da4d6b8fe10c Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:41:54 +0200 Subject: [PATCH 7/8] revert some mypy changes due to too old types-pillow package --- pypdf/_xobj_image_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 7a89525a6..5687b3233 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -229,7 +229,7 @@ def _handle_flate( [ b"".join( [ - colors_arr[1 if img.getpixel((x, y)) > 127 else 0] # type: ignore[operator] + colors_arr[1 if img.getpixel((x, y)) > 127 else 0] for x in range(img.size[0]) ] ) @@ -296,7 +296,7 @@ def _handle_jpx( # we need to convert to the good mode if img1.mode == mode or {img1.mode, mode} == {"L", "P"}: # compare (unordered) sets # L,P are indexed modes which should not be changed. - img: Image.Image = img1 + img = img1 elif {img1.mode, mode} == {"RGBA", "CMYK"}: # RGBA / CMYK are 4bytes encoding where # the encoding should be corrected From 54afba84f28664c4b134f2a3d537a8b7935f6413 Mon Sep 17 00:00:00 2001 From: stefan6419846 <96178532+stefan6419846@users.noreply.github.com> Date: Wed, 23 Oct 2024 07:17:13 +0200 Subject: [PATCH 8/8] update mpypy to version 1.13.0 --- requirements/ci-3.11.txt | 2 +- requirements/ci.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/ci-3.11.txt b/requirements/ci-3.11.txt index d8ff0d550..c920354ec 100644 --- a/requirements/ci-3.11.txt +++ b/requirements/ci-3.11.txt @@ -24,7 +24,7 @@ fpdf2==2.8.1 # via -r requirements/ci.in iniconfig==2.0.0 # via pytest -mypy==1.12.1 +mypy==1.13.0 # via -r requirements/ci.in mypy-extensions==1.0.0 # via mypy diff --git a/requirements/ci.txt b/requirements/ci.txt index a65985129..f9579c2ce 100644 --- a/requirements/ci.txt +++ b/requirements/ci.txt @@ -26,7 +26,7 @@ importlib-metadata==8.5.0 # via typeguard iniconfig==2.0.0 # via pytest -mypy==1.12.1 +mypy==1.13.0 # via -r requirements/ci.in mypy-extensions==1.0.0 # via mypy