Reduces docs build warnings to 0 (flyteorg#921)

reduce docs build warnings to 0 Signed-off-by: Samhita Alla <[email protected]> Signed-off-by: Samhita Alla <[email protected]>
eapolinario · Nov 15, 2022 · 3e245af · 3e245af
1 parent 283b456
commit 3e245af
Show file tree

Hide file tree

Showing 48 changed files with 124 additions and 138 deletions.
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -145,3 +145,18 @@ jobs:
               "tag_name": "'${{ needs.bump-version.outputs.version }}'",
               "prerelease": false
           }'
+
+  docs_warnings:
+    name: Docs Warnings
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: "0"
+      - name: Report Sphinx Warnings
+        id: sphinx-warnings
+        run: |
+          sudo apt-get install python3-sphinx
+          cd cookbook
+          pip install -r docs-requirements.txt
+          cd docs && make html
diff --git a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py
@@ -2,8 +2,8 @@
 Single Node, Multi GPU Training
 --------------------------------
 
-When you need to scale up model training in pytorch, you can use the :py:class:`~pytorch:torch.nn.DataParallel` for
-single node, multi-gpu/cpu training or :py:class:`~pytorch:torch.nn.parallel.DistributedDataParallel` for multi-node,
+When you need to scale up model training in pytorch, you can use the :py:class:`~torch:torch.nn.DataParallel` for
+single node, multi-gpu/cpu training or :py:class:`~torch:torch.nn.parallel.DistributedDataParallel` for multi-node,
 multi-gpu training.
 
 This tutorial will cover how to write a simple training script on the MNIST dataset that uses
@@ -37,7 +37,7 @@
 # %%
 # We'll re-use certain classes and functions from the
 # :ref:`single node and gpu tutorial <pytorch_single_node_and_gpu>`
-# such as the ``Net`` model architecture, ``Hyperparameters``, and ``log_test_predictions``.
+# such as the ``Net`` model architecture, ``Hyperparameters``, and ``log_test_predictions``\.
 from mnist_classifier.pytorch_single_node_and_gpu import Hyperparameters, Net, log_test_predictions
 from torch import distributed as dist
 from torch import multiprocessing as mp
@@ -53,18 +53,18 @@
 DATA_DIR = "./data"
 
 # %%
-# The following variables are specific to ``wandb``:
+# The following variables are specific to ``wandb``\:
 #
-# - ``NUM_BATCHES_TO_LOG``: Number of batches to log from the test data for each test step
-# - ``LOG_IMAGES_PER_BATCH``: Number of images to log per test batch
+# - ``NUM_BATCHES_TO_LOG``\: Number of batches to log from the test data for each test step
+# - ``LOG_IMAGES_PER_BATCH``\: Number of images to log per test batch
 NUM_BATCHES_TO_LOG = 10
 LOG_IMAGES_PER_BATCH = 32
 
 
 # %%
-# If running remotely, copy your ``wandb`` API key to the Dockerfile under the environment variable ``WANDB_API_KEY``.
+# If running remotely, copy your ``wandb`` API key to the Dockerfile under the environment variable ``WANDB_API_KEY``\.
 # This function logs into ``wandb`` and initializes the project. If you built your Docker image with the
-# ``WANDB_USERNAME``, this will work. Otherwise, replace ``my-user-name`` with your ``wandb`` user name.
+# ``WANDB_USERNAME``\, this will work. Otherwise, replace ``my-user-name`` with your ``wandb`` user name.
 #
 # We'll call this function in the ``pytorch_mnist_task`` defined below.
 def wandb_setup():
@@ -178,7 +178,7 @@ def train(model, rank, train_loader, optimizer, epoch, log_interval):
 # Evaluation
 # ==========
 #
-# We define a ``test`` function to test the model on the test dataset, logging ``accuracy``, and ``test_loss`` to a
+# We define a ``test`` function to test the model on the test dataset, logging ``accuracy``\, and ``test_loss`` to a
 # ``wandb`` `table <https://docs.wandb.ai/guides/data-vis/log-tables>`__, which helps us visualize the model's
 # performance in a structured format.
 def test(model, rank, test_loader):
@@ -271,8 +271,8 @@ def dist_setup(rank, world_size, backend):
 
 
 # %%
-# Then we define the ``train_mnist`` function. Note the conditionals that check for ``rank == 0``. These parts of the
-# functions are only called in the main process, which is the ``0``th rank. The reason for this is that we only want the
+# Then we define the ``train_mnist`` function. Note the conditionals that check for ``rank == 0``\. These parts of the
+# functions are only called in the main process, which is the ``0``\th rank. The reason for this is that we only want the
 # main process to perform certain actions such as:
 #
 # - log metrics via ``wandb``
@@ -355,12 +355,12 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters):
 
 
 # %%
-# The output model using :py:func:`pytorch:torch.save` saves the `state_dict` as described
+# The output model using :py:func:`torch:torch.save` saves the `state_dict` as described
 # `in pytorch docs <https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-and-loading-models>`_.
 # A common convention is to have the ``.pt`` extension for the model file.
 #
 # .. note::
-#    Note the usage of ``requests=Resources(gpu=WORLD_SIZE)``. This will force Flyte to allocate this task onto a
+#    Note the usage of ``requests=Resources(gpu=WORLD_SIZE)``\. This will force Flyte to allocate this task onto a
 #    machine with GPU(s), which in our case is 2 gpus. The task will be queued up until a machine with GPU(s) can be
 #    procured. Also, for the GPU Training to work, the Dockerfile needs to be built as explained in the
 #    :ref:`pytorch-dockerfile` section.
@@ -370,11 +370,11 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters):
 # =====================
 #
 # Next we define the flyte task that kicks off the distributed training process. Here we call the
-# pytorch :py:func:`multiprocessing <pytorch:torch.multiprocessing.spawn>` function to initiate a process on each
+# pytorch :py:func:`multiprocessing <torch:torch.multiprocessing.spawn>` function to initiate a process on each
 # available GPU. Since we're parallelizing the data, each process will contain a copy of the model and pytorch
 # will handle syncing the weights across all processes on ``optimizer.step()`` calls.
 #
-# Read more about pytorch distributed training `here <https://pytorch.org/tutorials/beginner/dist_overview.html>`_.
+# Read more about pytorch distributed training `here <https://pytorch.org/tutorials/beginner/dist_overview.html>`__.
 
 
 # %%

diff --git a/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py b/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py
@@ -57,7 +57,7 @@
 
 
 # %%
-# We declare ``NamedTuple``s which will be used as signatures of the Flyte task outputs.
+# We declare ``NamedTuple``\s which will be used as signatures of the Flyte task outputs.
 # The variable names and types correspond to the values of the unpacked tuples returned
 # from the corresponding Flyte task.
 plotdata = typing.NamedTuple(

diff --git a/cookbook/case_studies/ml_training/spark_horovod/README.rst b/cookbook/case_studies/ml_training/spark_horovod/README.rst
@@ -5,7 +5,7 @@ Forecasting Rossman Store Sales with Horovod and Spark
 
 The problem statement we will be looking at is forecasting sales using `rossmann store sales <https://www.kaggle.com/c/rossmann-store-sales>`__ data.
 Our example is an adaptation of the `Horovod-Spark example <https://github.com/horovod/horovod/blob/master/examples/spark/keras/keras_spark_rossmann_estimator.py>`__.
-Here’s how the code is streamlined:
+Here's how the code is streamlined:
 
 - Fetch the rossmann sales data
 - Perform complicated data pre-processing using Flyte-Spark plugin
@@ -17,7 +17,7 @@ About Horovod
 
 Horovod is a distributed deep learning training framework for TensorFlow, Keras, PyTorch, and Apache MXNet.
 The goal of Horovod is to make distributed deep learning fast and easy to use.
-It uses the all-reduce algorithm for fast distributed training rather than a parameter server approach (`all-reduce vs. parameter server <https://www.run.ai/guides/gpu-deep-learning/distributed-training/#Deep>`__).
+It uses the all-reduce algorithm for fast distributed training instead of a parameter server approach.
 It builds on top of low-level frameworks like MPI and NCCL and provides optimized algorithms for sharing data between parallel training processes.
 
 .. figure:: https://raw.githubusercontent.com/flyteorg/static-resources/main/flytesnacks/tutorials/horovod/all_reduce.png
@@ -30,7 +30,7 @@ About Spark
 
 Spark is a data processing and analytics engine to deal with large-scale data.
 
-Here’s an interesting fact about Spark integration:
+Here's an interesting fact about Spark integration:
 
 **Spark integrates with both Horovod and Flyte**
 
@@ -42,7 +42,7 @@ It facilitates running distributed jobs on the Spark cluster.
 In our example, we use an Estimator API.
 An estimator API abstracts the data processing, model training and checkpointing, and distributed training, which makes it easy to integrate and run our example code.
 
-Since we use the Keras deep learning library, here’s how we install the relevant Horovod packages:
+Since we use the Keras deep learning library, here's how we install the relevant Horovod packages:
 
 .. code-block:: bash
 
@@ -56,7 +56,7 @@ The installation includes enabling MPI and TensorFlow environments.
 Flyte and Spark
 ^^^^^^^^^^^^^^^
 
-Flyte can execute Spark jobs natively on a Kubernetes Cluster, which manages a virtual cluster’s lifecycle, spin-up, and tear down.
+Flyte can execute Spark jobs natively on a Kubernetes Cluster, which manages a virtual cluster's lifecycle, spin-up, and tear down.
 It leverages the open-sourced Spark On K8s Operator and can be enabled without signing up for any service.
 This is like running a transient spark cluster—a type of cluster spun up for a specific Spark job and torn down after completion.
 
@@ -70,16 +70,19 @@ To install the Spark plugin on Flyte, we use the following command:
     :alt: Flyte-Spark plugin
 
     Flyte-Spark plugin
+
 The plugin requires configuring the Flyte backend as well. Refer to :ref:`Kubernetes Spark Jobs <plugins-spark-k8s>` for setup instructions.
-In a nutshell, here’s how Horovod-Spark-Flyte can be beneficial:
+
+In a nutshell, here's how Horovod-Spark-Flyte can be beneficial:
+
 Horovod provides the distributed framework, Spark enables extracting, preprocessing, and partitioning data,
 Flyte can stitch the former two pieces together, e.g., by connecting the data output of a Spark transform to a training system using Horovod while ensuring high utilization of GPUs!
 
 Run workflows in this directory with the custom-built base image like so:
 
 .. prompt:: bash $
 
-      pyflyte run --remote keras_spark_rossmann_estimator.py:horovod_spark_wf --image ghcr.io/flyteorg/flytecookbook:spark_horovod-latest
+    pyflyte run --remote keras_spark_rossmann_estimator.py:horovod_spark_wf --image ghcr.io/flyteorg/flytecookbook:spark_horovod-latest
 
 .. panels::
     :header: text-center

diff --git a/cookbook/core/containerization/multi_images.py b/cookbook/core/containerization/multi_images.py
@@ -23,14 +23,14 @@
 1. Create an account with `Dockerhub <https://hub.docker.com/signup>`__.
 2. Build a Docker image using the Dockerfile:
 
-.. code-block::
+    .. code-block::
 
-   docker build . -f ./<dockerfile-folder>/<dockerfile-name> -t <your-name>/<docker-image-name>:<version>
+        docker build . -f ./<dockerfile-folder>/<dockerfile-name> -t <your-name>/<docker-image-name>:<version>
 3. Once the Docker image is built, login to your Dockerhub account from the CLI:
 
-.. code-block::
+    .. code-block::
 
-   docker login
+        docker login
 4. It prompts you to enter the username and the password.
 5. Push the Docker image to Dockerhub:
 
@@ -42,13 +42,13 @@
 
 .. code-block::
 
-   docker build -f ./path-to-dockerfile/Dockerfile.prediction -t username/multi-images-prediction:latest
-   docker login
-   docker push dockerhub_name/multi-images-prediction
+    docker build -f ./path-to-dockerfile/Dockerfile.prediction -t username/multi-images-prediction:latest
+    docker login
+    docker push dockerhub_name/multi-images-prediction
 
 .. tip::
 
-   Sometimes, ``docker login`` may not be successful. In such cases, execute ``docker logout`` and ``docker login``.
+    Sometimes, ``docker login`` may not be successful. In such cases, execute ``docker logout`` and ``docker login``.
 
 Let's dive into the example.
 """

diff --git a/cookbook/core/control_flow/chain_entities.py b/cookbook/core/control_flow/chain_entities.py
@@ -95,8 +95,8 @@ def chain_tasks_wf() -> pd.DataFrame:
 
 
 # %%
-# SubWorkflows
-# ^^^^^^^^^^^^
+# Chain SubWorkflows
+# ^^^^^^^^^^^^^^^^^^
 #
 # Similar to tasks, you can chain :ref:`subworkflows <subworkflows>`.
 @workflow

diff --git a/cookbook/core/control_flow/dynamics.py b/cookbook/core/control_flow/dynamics.py
@@ -124,7 +124,8 @@ def count_characters(s1: str, s2: str) -> int:
 # To manage this problem, the values need to be passed to the other tasks to unwrap them.
 #
 # .. note:: The local execution will work when a ``@dynamic`` decorator is used because Flytekit treats it like a ``task`` that will run with the Python native inputs.
-# Therefore, there are no Promise objects locally within the function decorated with ``@dynamic`` as it is treated as a ``task``.
+#
+# Therefore, there are no Promise objects locally within the function decorated with ``@dynamic`` as it is treated as a ``task``\.
 
 # %%
 # Finally, we define a workflow that calls the dynamic workflow.
@@ -185,13 +186,13 @@ def wf(s1: str, s2: str) -> int:
 #
 # A dynamic workflow is modeled as a task in the backend, but the body of the function is executed to produce a workflow at run-time. In both dynamic and static workflows, the output of tasks are Promise objects.
 #
+# .. note::
+#   When a dynamic (or static) workflow calls a task, the workflow returns a :py:class:`Promise <flytekit.extend.Promise>` object. You can’t interact with this Promise object directly since it uses lazy evaluation (it defers the evaluation until absolutely needed). You can unwrap the Promise object by passing it to a task or a dynamic workflow.
 #
-# .. note:: When a dynamic (or static) workflow calls a task, the workflow returns a :ref:`Promise <https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.extend.Promise.html#flytekit-extend-promise>` object. You can’t interact with this Promise object directly since it uses lazy evaluation (it defers the evaluation until absolutely needed). You can unwrap the Promise object by passing it to a task or a dynamic workflow.
-#
-# :ref:`Here<Predicting House Price in Multiple Regions Using XGBoost and Dynamic Workflows>` is an example of house price prediction using dynamic workflows.
+# :ref:`Here <Predicting House Price in Multiple Regions Using XGBoost and Dynamic Workflows>` is an example of house price prediction using dynamic workflows.
 #
 # Where Are Dynamic Workflows Used?
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
 # Dynamic workflow comes into the picture when you need to:
 #

diff --git a/cookbook/core/control_flow/subworkflows.py b/cookbook/core/control_flow/subworkflows.py
@@ -96,7 +96,7 @@ def nested_parent_wf(a: int) -> Tuple[int, str, str, str]:
     print(f"Running nested_parent_wf(a=3) {nested_parent_wf(a=3)}")
 
 # %%
-# .. note:: You can chain and execute subworkflows similar to chained :ref:`Flyte tasks<Chain Flyte Tasks>`.
+# .. note:: You can :ref:`chain and execute subworkflows <Chain SubWorkflows>` similar to chained flyte tasks.
 
 
 # %%
@@ -112,7 +112,7 @@ def nested_parent_wf(a: int) -> Tuple[int, str, str, str]:
 #
 # .. tip::
 #
-#    If your deployment uses :ref:`multicluster-setup <Using Multiple Kubernetes Clusters>`, then external workflows may allow you to distribute the workload of a workflow to multiple clusters.
+#    If your deployment uses :ref:`Using Multiple Kubernetes Clusters <flyte:multicluster-setup>`, then external workflows may allow you to distribute the workload of a workflow to multiple clusters.
 #
 # Here's an example demonstrating external workflows:
 

diff --git a/cookbook/core/extend_flyte/prebuilt_container.py b/cookbook/core/extend_flyte/prebuilt_container.py
@@ -17,8 +17,7 @@
 Take a look at the `example PR <https://github.com/flyteorg/flytekit/pull/470>`__, where we switched the built-in SQLite3 task from the old (user-container) to the new style of writing tasks.
 
 There aren't many changes from the user's standpoint:
-- Install whichever Python library has the task type definition (in the case of SQLite3, it's bundled in Flytekit, but this isn't always the case
-  (for example, `SQLAlchemy <https://github.com/flyteorg/flytekit/tree/master/plugins/flytekit-sqlalchemy>`__)).
+- Install whichever Python library has the task type definition (in the case of SQLite3, it's bundled in Flytekit, but this isn't always the case (for example, `SQLAlchemy <https://github.com/flyteorg/flytekit/tree/master/plugins/flytekit-sqlalchemy>`__)).
 - Import and instantiate the task as you would for any other type of non-function-based task.
 
 How to Write a Task
@@ -51,9 +50,9 @@
 
 Referring to the SQLite3 example, ::
 
-    container_image="ghcr.io/flyteorg/flytekit:py38-v0.19.0b7",
-    executor_type=SQLite3TaskExecutor,
-    task_type="sqlite",
+  container_image="ghcr.io/flyteorg/flytekit:py38-v0.19.0b7",
+  executor_type=SQLite3TaskExecutor,
+  task_type="sqlite",
 
 Note that the container is special in this case since we utilize the Flytekit image.
 
@@ -78,7 +77,7 @@
 
     pyflyte-execute --inputs s3://inputs.pb --output-prefix s3://outputs --raw-output-data-prefix s3://user-data --resolver flytekit.core.python_customized_container_task.default_task_template_resolver -- {{.taskTemplatePath}} path.to.your.executor.subclass
 
-This means that your `Docker image <https://github.com/flyteorg/flytekit/blob/master/Dockerfile.py3.10>`_ will need Python and Flytekit installed.
+This means that your `Docker image <https://github.com/flyteorg/flytekit/blob/master/Dockerfile>`__ will need Python and Flytekit installed.
 The container's Python interpreter should be able to find your custom executor class at the import path ``path.to.your.executor.subclass``.
 
 ----

diff --git a/cookbook/core/flyte_basics/basic_workflow.py b/cookbook/core/flyte_basics/basic_workflow.py
@@ -2,7 +2,8 @@
 Workflows
 ----------
 
-Once you have a handle on :ref:`tasks <basics_of_tasks>`, we can dive into Flyte workflows. Together, Flyte tasks and workflows make up the fundamental building blocks of Flyte.
+Once you have a handle on :ref:`tasks <basics_of_tasks>`, you can dive into Flyte workflows.
+Together, Flyte tasks and workflows make up the fundamental building blocks of Flyte.
 
 Workflows bind together two or more tasks. They can be written as Python functions, but it is essential to make a
 critical distinction between tasks and workflows.

diff --git a/cookbook/core/flyte_basics/decorating_tasks.py b/cookbook/core/flyte_basics/decorating_tasks.py
@@ -107,4 +107,4 @@ def wf(x: int) -> int:
 # In this example, you learned how to modify the behavior of tasks via function decorators using the built-in
 # :py:func:`~functools.wraps` decorator pattern. To learn more about how to extend Flyte at a deeper level, for
 # example creating custom types, custom tasks, or backend plugins,
-# see :ref:`Extending Flyte <sphx_glr_auto_core_extend_flyte>`.
+# see :ref:`Extending Flyte <plugins_extend>`.
diff --git a/cookbook/core/flyte_basics/decorating_workflows.py b/cookbook/core/flyte_basics/decorating_workflows.py
@@ -146,4 +146,4 @@ def wf(x: float) -> float:
 #
 # To define workflows imperatively, refer to :ref:`this example <sphx_glr_auto_core_flyte_basics_imperative_wf_style.py>`,
 # and to learn more about how to extend Flyte at a deeper level, for example creating custom types, custom tasks, or
-# backend plugins, see :ref:`Extending Flyte <sphx_glr_auto_core_extend_flyte>`.
+# backend plugins, see :ref:`Extending Flyte <plugins_extend>`.
diff --git a/cookbook/core/flyte_basics/files.py b/cookbook/core/flyte_basics/files.py
@@ -9,7 +9,7 @@
 `blob type <https://github.com/flyteorg/flyteidl/blob/cee566b2e6e109120f1bb34c980b1cfaf006a473/protos/flyteidl/core/types.proto#L47>`__.
 
 Let's assume our mission here is pretty simple. We download a few csv file
-links, read them with the python built-in :py:func:`csv.DictReader` function,
+links, read them with the python built-in :py:class:`csv.DictReader` function,
 normalize some pre-specified columns, and output the normalized columns to
 another csv file.
 

diff --git a/cookbook/core/flyte_basics/lp.py b/cookbook/core/flyte_basics/lp.py
@@ -1,6 +1,6 @@
 """
 Launch Plans
--------------
+------------
 
 Launch plans bind a partial or complete list of inputs necessary to launch a workflow, along
 with optional run-time overrides such as notifications, schedules and more.

diff --git a/cookbook/core/flyte_basics/task.py b/cookbook/core/flyte_basics/task.py
@@ -1,5 +1,6 @@
-# .. _basics_of_tasks:
 """
+.. _basics_of_tasks:
+
 Tasks
 -----
 

diff --git a/cookbook/core/remote_flyte/__init__.py b/cookbook/core/remote_flyte/__init__.py
diff --git a/cookbook/core/type_system/custom_objects.py b/cookbook/core/type_system/custom_objects.py
@@ -1,4 +1,6 @@
 """
+.. _dataclass_type:
+
 Using Custom Python Objects
 ---------------------------
 

diff --git a/cookbook/core/type_system/flyte_pickle.py b/cookbook/core/type_system/flyte_pickle.py
@@ -24,12 +24,12 @@
 
 
 # %%
-# This People is a user defined complex type, which can be used to pass complex data between tasks.
+# ``People`` is a user defined complex type, which can be used to pass complex data between tasks.
 # We will serialize this class to a pickle file and pass it between different tasks.
 #
 # .. Note::
 #
-#   Here we can also `turn this object to dataclass <custom_objects.html>`_ to have better performance.
+#   Here we can also :ref:`turn this object to dataclass <dataclass_type>` to have better performance.
 #   We use simple object here for demo purpose.
 #   You may have some object that can't turn into a dataclass, e.g. NumPy, Tensor.
 class People: