diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 01c46cf9f..fe7ff74d2 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -170,7 +170,7 @@ jobs: - name: Test docs build run: | pip install -r requirements/docs.txt - sphinx-build -n -W --keep-going -T -b html docs build/sphinx/html + sphinx-build --nitpicky --fail-on-warning --keep-going --show-traceback --builder html docs build/sphinx/html package: name: Build & verify package diff --git a/docs/conf.py b/docs/conf.py index 897472598..82672f35d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -42,7 +42,6 @@ # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = "4.0.0" -myst_all_links_external = True # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. @@ -91,6 +90,10 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +# Configure MyST extension. +myst_all_links_external = False +myst_heading_anchors = 3 + # -- Options for HTML output ------------------------------------------------- @@ -106,7 +109,6 @@ "canonical_url": "", "analytics_id": "", "logo_only": True, - "display_version": True, "prev_next_buttons_location": "bottom", "style_external_links": False, # Toc options diff --git a/docs/dev/documentation.md b/docs/dev/documentation.md index 70cf81297..5a0b06fa4 100644 --- a/docs/dev/documentation.md +++ b/docs/dev/documentation.md @@ -53,4 +53,4 @@ The title of the PR will be used as the first line of that combined commit messa The first comment within the commit will be used as the message body. -See [developer intro](intro.html#commit-messages) for more details. +See [developer intro](intro.md#commit-messages) for more details. diff --git a/docs/dev/intro.md b/docs/dev/intro.md index 8b84a8a00..cd46ff4be 100644 --- a/docs/dev/intro.md +++ b/docs/dev/intro.md @@ -11,7 +11,7 @@ pip install -r requirements/dev.txt ## Running Tests -See [testing pypdf with pytest](testing.md) +See [testing pypdf with pytest](testing.md). ## The sample-files git submodule The reason for having the submodule `sample-files` is that we want to keep diff --git a/docs/dev/pypdf-parsing.md b/docs/dev/pypdf-parsing.md index 7fa2c6f95..a41b9afda 100644 --- a/docs/dev/pypdf-parsing.md +++ b/docs/dev/pypdf-parsing.md @@ -1,6 +1,6 @@ # How pypdf parses PDF files -pypdf uses {py:class}`PdfReader ` to parse PDF files. +pypdf uses {class}`~pypdf.PdfReader` to parse PDF files. The method {py:meth}`PdfReader.read ` shows the basic structure of parsing: @@ -19,8 +19,8 @@ structure of parsing: content streams, which are sequences of PDF operators and operands. pypdf decodes these content streams by applying filters (e.g., `FlateDecode`, `LZWDecode`) specified in the stream's dictionary. This is only done when the - object is requested by {py:meth}`PdfReader.get_object - ` which uses the `PdfReader._get_object_from_stream` method. + object is requested by {py:meth}`PdfReader.get_object ` + which uses the `PdfReader._get_object_from_stream` method. ## References diff --git a/docs/meta/scope-of-pypdf.md b/docs/meta/scope-of-pypdf.md index 1420bc2f6..3d7d76217 100644 --- a/docs/meta/scope-of-pypdf.md +++ b/docs/meta/scope-of-pypdf.md @@ -62,7 +62,7 @@ Out of scope for the moment, but might be added if there are enough contributors [`reportlab`](https://pypi.org/project/reportlab/) / [`fpdf2`](https://pypi.org/project/fpdf2/) or document conversion tools like [`pdfkit`](https://pypi.org/project/pdfkit/). -* **Replacing words within a PDF**: [Extracting text from PDF is hard](https://pypdf.readthedocs.io/en/stable/user/extract-text.html#why-text-extraction-is-hard). +* **Replacing words within a PDF**: [Extracting text from PDF is hard](../user/extract-text.md#why-text-extraction-is-hard). Replacing text in a reliable way is even harder. For example, one word might be split into multiple tokens. Hence it's not a simple "search and replace" in some cases. diff --git a/docs/modules/PageObject.rst b/docs/modules/PageObject.rst index b4524b443..614606a94 100644 --- a/docs/modules/PageObject.rst +++ b/docs/modules/PageObject.rst @@ -15,3 +15,5 @@ The PageObject Class :members: :inherited-members: File :undoc-members: + +.. autofunction:: pypdf.mult diff --git a/docs/user/add-javascript.md b/docs/user/add-javascript.md index 02951fb19..8f48397f6 100644 --- a/docs/user/add-javascript.md +++ b/docs/user/add-javascript.md @@ -3,7 +3,7 @@ PDF readers vary in the extent they support JavaScript, with some not supporting it at all. Adobe has documentation on its support here: -https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html +[https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html](https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html) ## Launch print window on opening diff --git a/docs/user/add-watermark.md b/docs/user/add-watermark.md index 237cf5f68..9573b9d8c 100644 --- a/docs/user/add-watermark.md +++ b/docs/user/add-watermark.md @@ -8,7 +8,7 @@ background of the document. The process of stamping and watermarking is the same, you just need to set `over` parameter to `True` for stamping and `False` for watermarking. -You can use `merge_page()` if you don't need to transform the stamp: +You can use {func}`~pypdf._page.PageObject.merge_page` if you don't need to transform the stamp: ```python from pypdf import PdfReader, PdfWriter @@ -21,7 +21,7 @@ for page in writer.pages: writer.write("out.pdf") ``` -Otherwise use `merge_transformed_page()` with `Transformation()` if you need to translate, rotate, scale, etc. the stamp before merging it to the content page. +Otherwise use {func}`~pypdf._page.PageObject.merge_transformed_page` with {class}`~pypdf.Transformation` if you need to translate, rotate, scale, etc. the stamp before merging it to the content page. ```python from pathlib import Path @@ -56,7 +56,7 @@ stamp("example.pdf", "stamp.pdf", "out.pdf") ``` If you are experiencing wrongly rotated watermarks/stamps, try to use -`transfer_rotation_to_content()` on the corresponding pages beforehand +{func}`~pypdf._page.PageObject.transfer_rotation_to_content` on the corresponding pages beforehand to fix the page boxes. Example of stamp: diff --git a/docs/user/adding-pdf-annotations.md b/docs/user/adding-pdf-annotations.md index bdececf51..93b8249a4 100644 --- a/docs/user/adding-pdf-annotations.md +++ b/docs/user/adding-pdf-annotations.md @@ -22,7 +22,7 @@ If you want to add text in a box like this ![](free-text-annotation.png) -you can use the {py:class}`FreeText `: +you can use {class}`~pypdf.annotations.FreeText`: ```python from pypdf import PdfReader, PdfWriter @@ -71,7 +71,7 @@ If you want to add a line like this: ![](annotation-line.png) -you can use {py:class}`Line `: +you can use {class}`~pypdf.annotations.Line`: ```python from pypdf import PdfReader, PdfWriter @@ -103,7 +103,7 @@ If you want to add a line like this: ![](annotation-polyline.png) -you can use {py:class}`PolyLine `: +you can use {class}`~pypdf.annotations.PolyLine`: ```python from pypdf import PdfReader, PdfWriter @@ -132,7 +132,7 @@ If you want to add a rectangle like this: ![](annotation-square.png) -you can use {py:class}`Rectangle `: +you can use {class}`~pypdf.annotations.Rectangle`: ```python from pypdf import PdfReader, PdfWriter @@ -166,7 +166,7 @@ If you want to add a circle like this: ![](annotation-circle.png) -you can use {py:class}`Ellipse `: +you can use {class}`~pypdf.annotations.Ellipse`: ```python from pypdf import PdfReader, PdfWriter @@ -195,7 +195,7 @@ If you want to add a polygon like this: ![](annotation-polygon.png) -you can use {py:class}`Polygon `: +you can use {class}`~pypdf.annotations.Polygon`: ```python from pypdf import PdfReader, PdfWriter @@ -224,7 +224,7 @@ Manage the Popup windows for markups, looks like this: ![](annotation-popup.png) -you can use the {py:class}`Popup `: +you can use {py:class}`~pypdf.annotations.Popup`: ```python from pypdf.annotations import Popup, Text @@ -257,8 +257,7 @@ the parent annotation with which this popup annotation shall be associated. ## Link -If you want to add a link, you can use -{py:class}`Link `: +If you want to add a link, you can use {class}`~pypdf.annotations.Link`: ```python from pypdf import PdfReader, PdfWriter @@ -321,7 +320,7 @@ If you want to highlight text like this: ![](annotation-highlight.png) -you can use the {py:class}`Highlight `: +you can use {class}`~pypdf.annotations.Highlight`: ```python from pypdf import PdfReader, PdfWriter diff --git a/docs/user/cropping-and-transforming.md b/docs/user/cropping-and-transforming.md index d3c23333e..592878120 100644 --- a/docs/user/cropping-and-transforming.md +++ b/docs/user/cropping-and-transforming.md @@ -1,8 +1,10 @@ # Cropping and Transforming PDFs -> **Notice**: Just because content is no longer visible, it is not gone. -> Cropping works by adjusting the viewbox. That means content that was cropped -> away can still be restored. +```{note} +Just because content is no longer visible, it is not gone. +Cropping works by adjusting the viewbox. That means content that was cropped +away can still be restored. +``` ```python from pypdf import PdfReader, PdfWriter @@ -33,8 +35,7 @@ with open("pypdf-output.pdf", "wb") as fp: The most typical rotation is a clockwise rotation of the page by multiples of 90 degrees. That is done when the orientation of the page is wrong. You can -do that with the [`rotate` method](https://pypdf.readthedocs.io/en/latest/modules/PageObject.html#pypdf._page.PageObject.rotate) -of the `PageObject` class: +do that with the {func}`~pypdf._page.PageObject.rotate` method: ```python from pypdf import PdfReader, PdfWriter @@ -50,8 +51,8 @@ with open("output.pdf", "wb") as fp: ``` The rotate method is typically preferred over the `page.add_transformation(Transformation().rotate())` -method, because `rotate` will ensure that the page is still in the mediabox / -cropbox. The transformation object operates on the coordinates of the pages +method, because `rotate` will ensure that the page is still in the mediabox/cropbox. +The transformation object operates on the coordinates of the pages contents and does not change the mediabox or cropbox. @@ -193,7 +194,6 @@ writer.write("out-pg-transform.pdf") ### pypdf._page.MERGE_CROP_BOX `pypdf<=3.4.0` used to merge the other page with `trimbox`. - `pypdf>3.4.0` changes this behavior to `cropbox`. In case anybody has good reasons to use/expect `trimbox`, please let me know via @@ -210,7 +210,7 @@ We have designed the following business card (A8 format) to advertise our new st ![](nup-source.png) -We would like to copy this card sixteen times on an A4 page, to print it, cut it, and give it to all our friends. Having learned about the ``merge_page()`` method and the ``Transformation`` class, we run the following code. Notice that we had to tweak the media box of the source page to extend it, which is already a dirty hack (in this case). +We would like to copy this card sixteen times on an A4 page, to print it, cut it, and give it to all our friends. Having learned about the {func}`~pypdf._page.PageObject.merge_page` method and the {class}`~pypdf.Transformation` class, we run the following code. Notice that we had to tweak the media box of the source page to extend it, which is already a dirty hack (in this case). ```python from pypdf import PaperSize, PdfReader, PdfWriter, Transformation @@ -248,9 +248,9 @@ And the result is… unexpected. ![](nup-dest1.png) -The problem is that, having run ``add.transformation()`` several times on the *same* source page, those transformations add up: for instance, the sixteen transformations are applied to the last copy of the source page, so most of the business cards are *outside* the destination page. +The problem is that, having run ``add_transformation()`` several times on the *same* source page, those transformations add up: for instance, the sixteen transformations are applied to the last copy of the source page, so most of the business cards are *outside* the destination page. -We need a way to merge a transformed page, *without* modifying the source page. Here comes ``merge_transformed_page()``. With this method: +We need a way to merge a transformed page, *without* modifying the source page. Here comes {func}`~pypdf._page.PageObject.merge_transformed_page`. With this method: - we no longer need the media box hack of our first try; - transformations are only applied *once*. diff --git a/docs/user/encryption-decryption.md b/docs/user/encryption-decryption.md index 9beda1f2c..f76c42316 100644 --- a/docs/user/encryption-decryption.md +++ b/docs/user/encryption-decryption.md @@ -9,8 +9,10 @@ is the latest PDF standard. We recommend [`pyca/cryptography`](https://cryptography.io/en/latest/). Alternatively, you can use [`pycryptodome`](https://pypi.org/project/pycryptodome/). -> Please see the note in the [installation guide](installation.md) -> for installing the extra dependencies if interacting with PDFs that use AES. +```{note} +Please see the note in the [installation guide](installation.md) +for installing the extra dependencies if interacting with PDFs that use AES. +``` ## Encrypt @@ -32,8 +34,10 @@ with open("encrypted-pdf.pdf", "wb") as f: The algorithm can be one of `RC4-40`, `RC4-128`, `AES-128`, `AES-256-R5`, `AES-256`. We recommend using `AES-256-R5`. -> ⚠️ WARNING ⚠️: pypdf uses `RC4` by default for compatibility if you omit the "algorithm" parameter. -> Since `RC4` is insecure, you should use `AES` algorithms. +```{warning} +pypdf uses `RC4` by default for compatibility if you omit the "algorithm" parameter. +Since `RC4` is insecure, you should use `AES` algorithms. +``` ## Decrypt diff --git a/docs/user/extract-images.md b/docs/user/extract-images.md index fa3f6158d..4c4535300 100644 --- a/docs/user/extract-images.md +++ b/docs/user/extract-images.md @@ -1,7 +1,9 @@ # Extract Images -> Please note: In order to use the following code you need to install optional -> dependencies, see [installation guide](installation.md). +```{note} +In order to use the following code you need to install optional +dependencies, see [installation guide](installation.md). +``` Every page of a PDF document can contain an arbitrary amount of images. The names of the files may not be unique. diff --git a/docs/user/extract-text.md b/docs/user/extract-text.md index 107d43ffb..a071c5164 100644 --- a/docs/user/extract-text.md +++ b/docs/user/extract-text.md @@ -30,7 +30,7 @@ print(page.extract_text(extraction_mode="layout", layout_mode_scale_weight=1.0)) print(page.extract_text(extraction_mode="layout", layout_mode_strip_rotated=False)) ``` -Refer to [extract\_text](../modules/PageObject.html#pypdf._page.PageObject.extract_text) for more details. +Refer to {func}`~pypdf._page.PageObject.extract_text` for more details. ## Using a visitor @@ -49,7 +49,7 @@ It is recommended to use the user_matrix as it takes into all transformations. Notes : - As indicated in §8.3.3 of the PDF 1.7 or PDF 2.0 specification, the user matrix applies to text space/image space/form space/pattern space. - - If you want to get the full transformation from text to user space, you can use the `mult` function (available in global import) as follows: + - If you want to get the full transformation from text to user space, you can use the {func}`~.pypdf.mult` function as follows: `txt2user = mult(tm, cm))`. The font size is the raw text size and affected by the `user_matrix`. diff --git a/docs/user/file-size.md b/docs/user/file-size.md index d47ddcc0e..3cf1dfff1 100644 --- a/docs/user/file-size.md +++ b/docs/user/file-size.md @@ -65,7 +65,7 @@ method. It is a lossless compression, meaning the resulting PDF looks exactly the same. Deflate compression can be applied to a page via -[`page.compress_content_streams`](https://pypdf.readthedocs.io/en/latest/modules/PageObject.html#pypdf._page.PageObject.compress_content_streams): +{meth}`page.compress_content_streams `: ```python from pypdf import PdfWriter diff --git a/docs/user/merging-pdfs.md b/docs/user/merging-pdfs.md index df155daa3..4ca835e1e 100644 --- a/docs/user/merging-pdfs.md +++ b/docs/user/merging-pdfs.md @@ -49,7 +49,7 @@ output.close() ## append -`append` has been slightly extended in `PdfWriter`. See [PdfWriter.append](../modules/PdfWriter.html#pypdf.PdfWriter.append) for more details. +`append` has been slightly extended in `PdfWriter`. See {func}`~pypdf.PdfWriter.append` for more details. ### Examples @@ -136,7 +136,7 @@ new_page = writer.add_page(reader.pages[0], excluded_fields=["/B"]) ### Merging rotated pages -If you are working with rotated pages, you might want to call `transfer_rotation_to_content()` on the page +If you are working with rotated pages, you might want to call {func}`~pypdf._page.PageObject.transfer_rotation_to_content` on the page before merging to avoid wrongly rotated results: ```python diff --git a/docs/user/robustness.md b/docs/user/robustness.md index b63d75d45..69fa5fb69 100644 --- a/docs/user/robustness.md +++ b/docs/user/robustness.md @@ -29,8 +29,8 @@ pypdf gives you the option to be strict or not. pypdf has two core objects: -* [`PdfReader`](../modules/PdfReader.md) -* [`PdfWriter`](../modules/PdfWriter.md) +* {class}`~pypdf.PdfReader` +* {class}`~pypdf.PdfWriter` Only the PdfReader has a `strict` parameter, since presumably you do not want to write a non-conforming PDF. diff --git a/docs/user/viewer-preferences.md b/docs/user/viewer-preferences.md index b43c0feca..f4a58e710 100644 --- a/docs/user/viewer-preferences.md +++ b/docs/user/viewer-preferences.md @@ -4,12 +4,12 @@ It is possible to set viewer preferences of a PDF file. §12.2 of the [PDF 1.7 specification](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf). Note that the `/ViewerPreferences` dictionary does not exist by default. -If it is not already present, it must be created by calling the `create_viewer_preferences` method -of a `PdfWriter` object. +If it is not already present, it must be created by calling the +{func}`~pypdf.PdfWriter.create_viewer_preferences` method. -If viewer preferences exist in a PDF file being read with `PdfReader`, -you can access them as properties of `viewer_preferences`. -Otherwise, the `viewer_preferences` property will be set to `None`. +If viewer preferences exist in a PDF file being read with {class}`~pypdf.PdfReader`, +you can access them as properties of {attr}`~pypdf.PdfReader.viewer_preferences`. +Otherwise, the {attr}`~pypdf.PdfReader.viewer_preferences` property will be set to `None`. ## Example diff --git a/requirements/docs.in b/requirements/docs.in index 4254003a6..3dd64d0cf 100644 --- a/requirements/docs.in +++ b/requirements/docs.in @@ -1,4 +1,3 @@ sphinx -sphinx_rtd_theme<2.0.0 -myst_parser==0.16.1 -attrs # required for myst, but not automatically installed by myst +sphinx_rtd_theme +myst_parser diff --git a/requirements/docs.txt b/requirements/docs.txt index 6a9c59d62..d2cd8a720 100644 --- a/requirements/docs.txt +++ b/requirements/docs.txt @@ -1,87 +1,75 @@ # -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile requirements/docs.in # -alabaster==0.7.13 +alabaster==1.0.0 # via sphinx -attrs==23.1.0 - # via -r requirements/docs.in -babel==2.14.0 +babel==2.16.0 # via sphinx -certifi==2023.11.17 +certifi==2024.8.30 # via requests -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 # via requests -docutils==0.17.1 +docutils==0.21.2 # via # myst-parser # sphinx # sphinx-rtd-theme -idna==3.6 +idna==3.10 # via requests imagesize==1.4.1 # via sphinx -importlib-metadata==6.7.0 - # via - # attrs - # sphinx -jinja2==3.1.2 +jinja2==3.1.4 # via # myst-parser # sphinx -markdown-it-py==2.2.0 +markdown-it-py==3.0.0 # via # mdit-py-plugins # myst-parser -markupsafe==2.1.3 +markupsafe==3.0.1 # via jinja2 -mdit-py-plugins==0.3.5 +mdit-py-plugins==0.4.2 # via myst-parser mdurl==0.1.2 # via markdown-it-py -myst-parser==0.16.1 +myst-parser==4.0.0 # via -r requirements/docs.in -packaging==23.2 +packaging==24.1 # via sphinx -pygments==2.17.2 +pygments==2.18.0 # via sphinx -pytz==2023.3.post1 - # via babel -pyyaml==6.0.1 +pyyaml==6.0.2 # via myst-parser -requests==2.31.0 +requests==2.32.3 # via sphinx snowballstemmer==2.2.0 # via sphinx -sphinx==4.5.0 +sphinx==8.1.3 # via # -r requirements/docs.in # myst-parser # sphinx-rtd-theme # sphinxcontrib-jquery -sphinx-rtd-theme==1.3.0 +sphinx-rtd-theme==3.0.1 # via -r requirements/docs.in -sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx sphinxcontrib-jquery==4.1 # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -typing-extensions==4.7.1 - # via - # importlib-metadata - # markdown-it-py -urllib3==2.0.7 +tomli==2.0.2 + # via sphinx +urllib3==2.2.3 # via requests -zipp==3.15.0 - # via importlib-metadata