diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 61c7f7fc0c..2a94223f95 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -145,3 +145,18 @@ jobs: "tag_name": "'${{ needs.bump-version.outputs.version }}'", "prerelease": false }' + + docs_warnings: + name: Docs Warnings + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: "0" + - name: Report Sphinx Warnings + id: sphinx-warnings + run: | + sudo apt-get install python3-sphinx + cd cookbook + pip install -r docs-requirements.txt + cd docs && make html \ No newline at end of file diff --git a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py index f05d92ad22..4308292daa 100644 --- a/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py +++ b/cookbook/case_studies/ml_training/mnist_classifier/pytorch_single_node_multi_gpu.py @@ -2,8 +2,8 @@ Single Node, Multi GPU Training -------------------------------- -When you need to scale up model training in pytorch, you can use the :py:class:`~pytorch:torch.nn.DataParallel` for -single node, multi-gpu/cpu training or :py:class:`~pytorch:torch.nn.parallel.DistributedDataParallel` for multi-node, +When you need to scale up model training in pytorch, you can use the :py:class:`~torch:torch.nn.DataParallel` for +single node, multi-gpu/cpu training or :py:class:`~torch:torch.nn.parallel.DistributedDataParallel` for multi-node, multi-gpu training. This tutorial will cover how to write a simple training script on the MNIST dataset that uses @@ -37,7 +37,7 @@ # %% # We'll re-use certain classes and functions from the # :ref:`single node and gpu tutorial ` -# such as the ``Net`` model architecture, ``Hyperparameters``, and ``log_test_predictions``. +# such as the ``Net`` model architecture, ``Hyperparameters``, and ``log_test_predictions``\. from mnist_classifier.pytorch_single_node_and_gpu import Hyperparameters, Net, log_test_predictions from torch import distributed as dist from torch import multiprocessing as mp @@ -53,18 +53,18 @@ DATA_DIR = "./data" # %% -# The following variables are specific to ``wandb``: +# The following variables are specific to ``wandb``\: # -# - ``NUM_BATCHES_TO_LOG``: Number of batches to log from the test data for each test step -# - ``LOG_IMAGES_PER_BATCH``: Number of images to log per test batch +# - ``NUM_BATCHES_TO_LOG``\: Number of batches to log from the test data for each test step +# - ``LOG_IMAGES_PER_BATCH``\: Number of images to log per test batch NUM_BATCHES_TO_LOG = 10 LOG_IMAGES_PER_BATCH = 32 # %% -# If running remotely, copy your ``wandb`` API key to the Dockerfile under the environment variable ``WANDB_API_KEY``. +# If running remotely, copy your ``wandb`` API key to the Dockerfile under the environment variable ``WANDB_API_KEY``\. # This function logs into ``wandb`` and initializes the project. If you built your Docker image with the -# ``WANDB_USERNAME``, this will work. Otherwise, replace ``my-user-name`` with your ``wandb`` user name. +# ``WANDB_USERNAME``\, this will work. Otherwise, replace ``my-user-name`` with your ``wandb`` user name. # # We'll call this function in the ``pytorch_mnist_task`` defined below. def wandb_setup(): @@ -178,7 +178,7 @@ def train(model, rank, train_loader, optimizer, epoch, log_interval): # Evaluation # ========== # -# We define a ``test`` function to test the model on the test dataset, logging ``accuracy``, and ``test_loss`` to a +# We define a ``test`` function to test the model on the test dataset, logging ``accuracy``\, and ``test_loss`` to a # ``wandb`` `table `__, which helps us visualize the model's # performance in a structured format. def test(model, rank, test_loader): @@ -271,8 +271,8 @@ def dist_setup(rank, world_size, backend): # %% -# Then we define the ``train_mnist`` function. Note the conditionals that check for ``rank == 0``. These parts of the -# functions are only called in the main process, which is the ``0``th rank. The reason for this is that we only want the +# Then we define the ``train_mnist`` function. Note the conditionals that check for ``rank == 0``\. These parts of the +# functions are only called in the main process, which is the ``0``\th rank. The reason for this is that we only want the # main process to perform certain actions such as: # # - log metrics via ``wandb`` @@ -355,12 +355,12 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters): # %% -# The output model using :py:func:`pytorch:torch.save` saves the `state_dict` as described +# The output model using :py:func:`torch:torch.save` saves the `state_dict` as described # `in pytorch docs `_. # A common convention is to have the ``.pt`` extension for the model file. # # .. note:: -# Note the usage of ``requests=Resources(gpu=WORLD_SIZE)``. This will force Flyte to allocate this task onto a +# Note the usage of ``requests=Resources(gpu=WORLD_SIZE)``\. This will force Flyte to allocate this task onto a # machine with GPU(s), which in our case is 2 gpus. The task will be queued up until a machine with GPU(s) can be # procured. Also, for the GPU Training to work, the Dockerfile needs to be built as explained in the # :ref:`pytorch-dockerfile` section. @@ -370,11 +370,11 @@ def train_mnist(rank: int, world_size: int, hp: Hyperparameters): # ===================== # # Next we define the flyte task that kicks off the distributed training process. Here we call the -# pytorch :py:func:`multiprocessing ` function to initiate a process on each +# pytorch :py:func:`multiprocessing ` function to initiate a process on each # available GPU. Since we're parallelizing the data, each process will contain a copy of the model and pytorch # will handle syncing the weights across all processes on ``optimizer.step()`` calls. # -# Read more about pytorch distributed training `here `_. +# Read more about pytorch distributed training `here `__. # %% diff --git a/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py b/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py index 4004a1324a..d625f86ad9 100644 --- a/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py +++ b/cookbook/case_studies/ml_training/nlp_processing/word2vec_and_lda.py @@ -57,7 +57,7 @@ # %% -# We declare ``NamedTuple``s which will be used as signatures of the Flyte task outputs. +# We declare ``NamedTuple``\s which will be used as signatures of the Flyte task outputs. # The variable names and types correspond to the values of the unpacked tuples returned # from the corresponding Flyte task. plotdata = typing.NamedTuple( diff --git a/cookbook/case_studies/ml_training/spark_horovod/README.rst b/cookbook/case_studies/ml_training/spark_horovod/README.rst index be2d6ddd20..db3434d007 100644 --- a/cookbook/case_studies/ml_training/spark_horovod/README.rst +++ b/cookbook/case_studies/ml_training/spark_horovod/README.rst @@ -5,7 +5,7 @@ Forecasting Rossman Store Sales with Horovod and Spark The problem statement we will be looking at is forecasting sales using `rossmann store sales `__ data. Our example is an adaptation of the `Horovod-Spark example `__. -Here’s how the code is streamlined: +Here's how the code is streamlined: - Fetch the rossmann sales data - Perform complicated data pre-processing using Flyte-Spark plugin @@ -17,7 +17,7 @@ About Horovod Horovod is a distributed deep learning training framework for TensorFlow, Keras, PyTorch, and Apache MXNet. The goal of Horovod is to make distributed deep learning fast and easy to use. -It uses the all-reduce algorithm for fast distributed training rather than a parameter server approach (`all-reduce vs. parameter server `__). +It uses the all-reduce algorithm for fast distributed training instead of a parameter server approach. It builds on top of low-level frameworks like MPI and NCCL and provides optimized algorithms for sharing data between parallel training processes. .. figure:: https://raw.githubusercontent.com/flyteorg/static-resources/main/flytesnacks/tutorials/horovod/all_reduce.png @@ -30,7 +30,7 @@ About Spark Spark is a data processing and analytics engine to deal with large-scale data. -Here’s an interesting fact about Spark integration: +Here's an interesting fact about Spark integration: **Spark integrates with both Horovod and Flyte** @@ -42,7 +42,7 @@ It facilitates running distributed jobs on the Spark cluster. In our example, we use an Estimator API. An estimator API abstracts the data processing, model training and checkpointing, and distributed training, which makes it easy to integrate and run our example code. -Since we use the Keras deep learning library, here’s how we install the relevant Horovod packages: +Since we use the Keras deep learning library, here's how we install the relevant Horovod packages: .. code-block:: bash @@ -56,7 +56,7 @@ The installation includes enabling MPI and TensorFlow environments. Flyte and Spark ^^^^^^^^^^^^^^^ -Flyte can execute Spark jobs natively on a Kubernetes Cluster, which manages a virtual cluster’s lifecycle, spin-up, and tear down. +Flyte can execute Spark jobs natively on a Kubernetes Cluster, which manages a virtual cluster's lifecycle, spin-up, and tear down. It leverages the open-sourced Spark On K8s Operator and can be enabled without signing up for any service. This is like running a transient spark cluster—a type of cluster spun up for a specific Spark job and torn down after completion. @@ -70,8 +70,11 @@ To install the Spark plugin on Flyte, we use the following command: :alt: Flyte-Spark plugin Flyte-Spark plugin + The plugin requires configuring the Flyte backend as well. Refer to :ref:`Kubernetes Spark Jobs ` for setup instructions. -In a nutshell, here’s how Horovod-Spark-Flyte can be beneficial: + +In a nutshell, here's how Horovod-Spark-Flyte can be beneficial: + Horovod provides the distributed framework, Spark enables extracting, preprocessing, and partitioning data, Flyte can stitch the former two pieces together, e.g., by connecting the data output of a Spark transform to a training system using Horovod while ensuring high utilization of GPUs! @@ -79,7 +82,7 @@ Run workflows in this directory with the custom-built base image like so: .. prompt:: bash $ - pyflyte run --remote keras_spark_rossmann_estimator.py:horovod_spark_wf --image ghcr.io/flyteorg/flytecookbook:spark_horovod-latest + pyflyte run --remote keras_spark_rossmann_estimator.py:horovod_spark_wf --image ghcr.io/flyteorg/flytecookbook:spark_horovod-latest .. panels:: :header: text-center diff --git a/cookbook/core/containerization/multi_images.py b/cookbook/core/containerization/multi_images.py index 057cc04472..0a66ce2ac7 100644 --- a/cookbook/core/containerization/multi_images.py +++ b/cookbook/core/containerization/multi_images.py @@ -23,14 +23,14 @@ 1. Create an account with `Dockerhub `__. 2. Build a Docker image using the Dockerfile: -.. code-block:: + .. code-block:: - docker build . -f .// -t /: + docker build . -f .// -t /: 3. Once the Docker image is built, login to your Dockerhub account from the CLI: -.. code-block:: + .. code-block:: - docker login + docker login 4. It prompts you to enter the username and the password. 5. Push the Docker image to Dockerhub: @@ -42,13 +42,13 @@ .. code-block:: - docker build -f ./path-to-dockerfile/Dockerfile.prediction -t username/multi-images-prediction:latest - docker login - docker push dockerhub_name/multi-images-prediction + docker build -f ./path-to-dockerfile/Dockerfile.prediction -t username/multi-images-prediction:latest + docker login + docker push dockerhub_name/multi-images-prediction .. tip:: - Sometimes, ``docker login`` may not be successful. In such cases, execute ``docker logout`` and ``docker login``. + Sometimes, ``docker login`` may not be successful. In such cases, execute ``docker logout`` and ``docker login``. Let's dive into the example. """ diff --git a/cookbook/core/control_flow/chain_entities.py b/cookbook/core/control_flow/chain_entities.py index 920b729d09..4a7f026ae7 100644 --- a/cookbook/core/control_flow/chain_entities.py +++ b/cookbook/core/control_flow/chain_entities.py @@ -95,8 +95,8 @@ def chain_tasks_wf() -> pd.DataFrame: # %% -# SubWorkflows -# ^^^^^^^^^^^^ +# Chain SubWorkflows +# ^^^^^^^^^^^^^^^^^^ # # Similar to tasks, you can chain :ref:`subworkflows `. @workflow diff --git a/cookbook/core/control_flow/dynamics.py b/cookbook/core/control_flow/dynamics.py index 525afc9056..33f3512ce8 100644 --- a/cookbook/core/control_flow/dynamics.py +++ b/cookbook/core/control_flow/dynamics.py @@ -124,7 +124,8 @@ def count_characters(s1: str, s2: str) -> int: # To manage this problem, the values need to be passed to the other tasks to unwrap them. # # .. note:: The local execution will work when a ``@dynamic`` decorator is used because Flytekit treats it like a ``task`` that will run with the Python native inputs. -# Therefore, there are no Promise objects locally within the function decorated with ``@dynamic`` as it is treated as a ``task``. +# +# Therefore, there are no Promise objects locally within the function decorated with ``@dynamic`` as it is treated as a ``task``\. # %% # Finally, we define a workflow that calls the dynamic workflow. @@ -185,13 +186,13 @@ def wf(s1: str, s2: str) -> int: # # A dynamic workflow is modeled as a task in the backend, but the body of the function is executed to produce a workflow at run-time. In both dynamic and static workflows, the output of tasks are Promise objects. # +# .. note:: +# When a dynamic (or static) workflow calls a task, the workflow returns a :py:class:`Promise ` object. You can’t interact with this Promise object directly since it uses lazy evaluation (it defers the evaluation until absolutely needed). You can unwrap the Promise object by passing it to a task or a dynamic workflow. # -# .. note:: When a dynamic (or static) workflow calls a task, the workflow returns a :ref:`Promise ` object. You can’t interact with this Promise object directly since it uses lazy evaluation (it defers the evaluation until absolutely needed). You can unwrap the Promise object by passing it to a task or a dynamic workflow. -# -# :ref:`Here` is an example of house price prediction using dynamic workflows. +# :ref:`Here ` is an example of house price prediction using dynamic workflows. # # Where Are Dynamic Workflows Used? -# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Dynamic workflow comes into the picture when you need to: # diff --git a/cookbook/core/control_flow/subworkflows.py b/cookbook/core/control_flow/subworkflows.py index fba59c526d..967e87c691 100644 --- a/cookbook/core/control_flow/subworkflows.py +++ b/cookbook/core/control_flow/subworkflows.py @@ -96,7 +96,7 @@ def nested_parent_wf(a: int) -> Tuple[int, str, str, str]: print(f"Running nested_parent_wf(a=3) {nested_parent_wf(a=3)}") # %% -# .. note:: You can chain and execute subworkflows similar to chained :ref:`Flyte tasks`. +# .. note:: You can :ref:`chain and execute subworkflows ` similar to chained flyte tasks. # %% @@ -112,7 +112,7 @@ def nested_parent_wf(a: int) -> Tuple[int, str, str, str]: # # .. tip:: # -# If your deployment uses :ref:`multicluster-setup `, then external workflows may allow you to distribute the workload of a workflow to multiple clusters. +# If your deployment uses :ref:`Using Multiple Kubernetes Clusters `, then external workflows may allow you to distribute the workload of a workflow to multiple clusters. # # Here's an example demonstrating external workflows: diff --git a/cookbook/core/extend_flyte/prebuilt_container.py b/cookbook/core/extend_flyte/prebuilt_container.py index acb72f6cff..8cabc00666 100644 --- a/cookbook/core/extend_flyte/prebuilt_container.py +++ b/cookbook/core/extend_flyte/prebuilt_container.py @@ -17,8 +17,7 @@ Take a look at the `example PR `__, where we switched the built-in SQLite3 task from the old (user-container) to the new style of writing tasks. There aren't many changes from the user's standpoint: -- Install whichever Python library has the task type definition (in the case of SQLite3, it's bundled in Flytekit, but this isn't always the case - (for example, `SQLAlchemy `__)). +- Install whichever Python library has the task type definition (in the case of SQLite3, it's bundled in Flytekit, but this isn't always the case (for example, `SQLAlchemy `__)). - Import and instantiate the task as you would for any other type of non-function-based task. How to Write a Task @@ -51,9 +50,9 @@ Referring to the SQLite3 example, :: - container_image="ghcr.io/flyteorg/flytekit:py38-v0.19.0b7", - executor_type=SQLite3TaskExecutor, - task_type="sqlite", + container_image="ghcr.io/flyteorg/flytekit:py38-v0.19.0b7", + executor_type=SQLite3TaskExecutor, + task_type="sqlite", Note that the container is special in this case since we utilize the Flytekit image. @@ -78,7 +77,7 @@ pyflyte-execute --inputs s3://inputs.pb --output-prefix s3://outputs --raw-output-data-prefix s3://user-data --resolver flytekit.core.python_customized_container_task.default_task_template_resolver -- {{.taskTemplatePath}} path.to.your.executor.subclass -This means that your `Docker image `_ will need Python and Flytekit installed. +This means that your `Docker image `__ will need Python and Flytekit installed. The container's Python interpreter should be able to find your custom executor class at the import path ``path.to.your.executor.subclass``. ---- diff --git a/cookbook/core/flyte_basics/basic_workflow.py b/cookbook/core/flyte_basics/basic_workflow.py index 587a4df827..9fdc8de514 100644 --- a/cookbook/core/flyte_basics/basic_workflow.py +++ b/cookbook/core/flyte_basics/basic_workflow.py @@ -2,7 +2,8 @@ Workflows ---------- -Once you have a handle on :ref:`tasks `, we can dive into Flyte workflows. Together, Flyte tasks and workflows make up the fundamental building blocks of Flyte. +Once you have a handle on :ref:`tasks `, you can dive into Flyte workflows. +Together, Flyte tasks and workflows make up the fundamental building blocks of Flyte. Workflows bind together two or more tasks. They can be written as Python functions, but it is essential to make a critical distinction between tasks and workflows. diff --git a/cookbook/core/flyte_basics/decorating_tasks.py b/cookbook/core/flyte_basics/decorating_tasks.py index 79f919fc09..f5563a3592 100644 --- a/cookbook/core/flyte_basics/decorating_tasks.py +++ b/cookbook/core/flyte_basics/decorating_tasks.py @@ -107,4 +107,4 @@ def wf(x: int) -> int: # In this example, you learned how to modify the behavior of tasks via function decorators using the built-in # :py:func:`~functools.wraps` decorator pattern. To learn more about how to extend Flyte at a deeper level, for # example creating custom types, custom tasks, or backend plugins, -# see :ref:`Extending Flyte `. +# see :ref:`Extending Flyte `. diff --git a/cookbook/core/flyte_basics/decorating_workflows.py b/cookbook/core/flyte_basics/decorating_workflows.py index 4970360df9..5979604a4b 100644 --- a/cookbook/core/flyte_basics/decorating_workflows.py +++ b/cookbook/core/flyte_basics/decorating_workflows.py @@ -146,4 +146,4 @@ def wf(x: float) -> float: # # To define workflows imperatively, refer to :ref:`this example `, # and to learn more about how to extend Flyte at a deeper level, for example creating custom types, custom tasks, or -# backend plugins, see :ref:`Extending Flyte `. +# backend plugins, see :ref:`Extending Flyte `. diff --git a/cookbook/core/flyte_basics/files.py b/cookbook/core/flyte_basics/files.py index 3079452ffd..9de5865f8e 100644 --- a/cookbook/core/flyte_basics/files.py +++ b/cookbook/core/flyte_basics/files.py @@ -9,7 +9,7 @@ `blob type `__. Let's assume our mission here is pretty simple. We download a few csv file -links, read them with the python built-in :py:func:`csv.DictReader` function, +links, read them with the python built-in :py:class:`csv.DictReader` function, normalize some pre-specified columns, and output the normalized columns to another csv file. diff --git a/cookbook/core/flyte_basics/lp.py b/cookbook/core/flyte_basics/lp.py index 0d30b2a01b..1920768d93 100644 --- a/cookbook/core/flyte_basics/lp.py +++ b/cookbook/core/flyte_basics/lp.py @@ -1,6 +1,6 @@ """ Launch Plans -------------- +------------ Launch plans bind a partial or complete list of inputs necessary to launch a workflow, along with optional run-time overrides such as notifications, schedules and more. diff --git a/cookbook/core/flyte_basics/task.py b/cookbook/core/flyte_basics/task.py index d50a622754..a0447c0ad4 100644 --- a/cookbook/core/flyte_basics/task.py +++ b/cookbook/core/flyte_basics/task.py @@ -1,5 +1,6 @@ -# .. _basics_of_tasks: """ +.. _basics_of_tasks: + Tasks ----- diff --git a/cookbook/core/remote_flyte/__init__.py b/cookbook/core/remote_flyte/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/cookbook/core/type_system/custom_objects.py b/cookbook/core/type_system/custom_objects.py index 78d23b342e..9aa766136d 100644 --- a/cookbook/core/type_system/custom_objects.py +++ b/cookbook/core/type_system/custom_objects.py @@ -1,4 +1,6 @@ """ +.. _dataclass_type: + Using Custom Python Objects --------------------------- diff --git a/cookbook/core/type_system/flyte_pickle.py b/cookbook/core/type_system/flyte_pickle.py index 00a7033706..ed5f3d93ac 100644 --- a/cookbook/core/type_system/flyte_pickle.py +++ b/cookbook/core/type_system/flyte_pickle.py @@ -24,12 +24,12 @@ # %% -# This People is a user defined complex type, which can be used to pass complex data between tasks. +# ``People`` is a user defined complex type, which can be used to pass complex data between tasks. # We will serialize this class to a pickle file and pass it between different tasks. # # .. Note:: # -# Here we can also `turn this object to dataclass `_ to have better performance. +# Here we can also :ref:`turn this object to dataclass ` to have better performance. # We use simple object here for demo purpose. # You may have some object that can't turn into a dataclass, e.g. NumPy, Tensor. class People: diff --git a/cookbook/core/type_system/flyte_python_types.py b/cookbook/core/type_system/flyte_python_types.py index 8148026461..9346a7dc39 100644 --- a/cookbook/core/type_system/flyte_python_types.py +++ b/cookbook/core/type_system/flyte_python_types.py @@ -88,6 +88,6 @@ * - User defined types - Any - Custom Transformers - - Use python 3 type hints. We use `FlytePickle transformer `_ by default, but users still can provide custom transformers. Refer to :ref:`advanced_custom_types`. + - Use python 3 type hints. We use ``FlytePickle transformer`` by default, but users still can provide custom transformers. Refer to :ref:`advanced_custom_types`. """ diff --git a/cookbook/core/type_system/structured_dataset.py b/cookbook/core/type_system/structured_dataset.py index 76c2c715d7..7b70bcca2b 100644 --- a/cookbook/core/type_system/structured_dataset.py +++ b/cookbook/core/type_system/structured_dataset.py @@ -50,7 +50,7 @@ # %% -# We define two tasks, one returns a pandas DataFrame and the other a :py:class:`flytekit.types.schema.FlyteSchema`. +# We define two tasks, one returns a pandas DataFrame and the other a ``FlyteSchema``. # Flyte serializes the DataFrames to an intermediate format, a parquet file, before sending them to the other tasks. @task def get_df(a: int) -> Annotated[pd.DataFrame, superset_cols]: diff --git a/cookbook/core/type_system/typed_schema.py b/cookbook/core/type_system/typed_schema.py index caed2b8c61..a00cc01c4e 100644 --- a/cookbook/core/type_system/typed_schema.py +++ b/cookbook/core/type_system/typed_schema.py @@ -21,7 +21,7 @@ # %% -# To write to a schema object refer to :py:meth:`flytekit.types.schema.FlyteSchema.open` method. Writing can be done +# To write to a schema object refer to ``FlyteSchema.open`` method. Writing can be done # using any of the supported dataframe formats. # # .. todo:: @@ -36,7 +36,7 @@ def t1() -> out_schema: # %% -# To read a Schema, one has to invoke the :py:meth:`flytekit.types.schema.FlyteSchema.open`. The default mode +# To read a Schema, one has to invoke the ``FlyteSchema.open``. The default mode # is automatically configured to be `open` and the default returned dataframe type is :py:class:`pandas.DataFrame` # Different types of dataframes can be returned based on the type passed into the open method @task diff --git a/cookbook/deployment/deploying_workflows.py b/cookbook/deployment/deploying_workflows.py index 9cdd6fa8f2..eae81c6e14 100644 --- a/cookbook/deployment/deploying_workflows.py +++ b/cookbook/deployment/deploying_workflows.py @@ -1,6 +1,6 @@ """ Deploying Workflows - Registration ------------------------------------ +---------------------------------- Locally, Flytekit relies on the Python interpreter to execute tasks and workflows. To leverage the full power of Flyte, we recommend using a deployed backend of Flyte. Flyte can be run @@ -12,7 +12,7 @@ 2. Providing packaging in the form of Docker images for code when needed. In some cases packaging isn't needed, because the code itself is portable- for example SQL, or the task references a remote service - SageMaker Builtin algorithms, or the code can be safely transferred over; -3. Alternatively, packaging with :ref:`deployment-fast-registration`; +3. Alternatively, packaging with :ref:`Fast Registration`; 4. Registering the serialized workflows and tasks. Using remote Flyte provides: @@ -27,7 +27,7 @@ ^^^^^^^^^^^^^^^^^^^^^^ 1. Commit your changes. Some of the steps below default to referencing the git sha. -2. Run ``pyflyte register``. This :doc:`command ` compiles all Flyte entities and sends it to the backend as specified by your config file. +2. Run ``pyflyte register``. This command compiles all Flyte entities and sends it to the backend as specified by your config file. 3. Build a container image that holds your code. .. code-block:: docker @@ -132,9 +132,8 @@ - :code:`--version` is a unique string used to identify the version of your entities to be registered under a project and domain. - If required, you can specify a :code:`--k8sServiceAccount` and :code:`--assumableIamRole` which your tasks will run with. - Fast Registration -^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^ Re-building a new Docker container image for every code change you make is cumbersome and slow. If you're making purely code changes that **do not** require updating your container definition, you can make use of fast serialization and registration to speed up your iteration process and reduce the time it takes to upload new entity diff --git a/cookbook/docs/Makefile b/cookbook/docs/Makefile index 55e0638866..d57719612a 100644 --- a/cookbook/docs/Makefile +++ b/cookbook/docs/Makefile @@ -4,7 +4,7 @@ rsts: .PHONY: html html: - sphinx-build -n -b html . _build + sphinx-build -n -W -b html . _build .PHONY: linkcheck linkcheck: diff --git a/cookbook/docs/batch_prediction.rst b/cookbook/docs/batch_prediction.rst deleted file mode 100644 index dc4177fe9c..0000000000 --- a/cookbook/docs/batch_prediction.rst +++ /dev/null @@ -1,9 +0,0 @@ -:nosearch: - -################# -Batch Predictions -################# - -.. NOTE:: - - Coming soon 🛠 diff --git a/cookbook/docs/bioinformatics.rst b/cookbook/docs/bioinformatics_examples.rst similarity index 94% rename from cookbook/docs/bioinformatics.rst rename to cookbook/docs/bioinformatics_examples.rst index a34e8d8f48..7b199eadb0 100644 --- a/cookbook/docs/bioinformatics.rst +++ b/cookbook/docs/bioinformatics_examples.rst @@ -1,6 +1,6 @@ -################### +############## Bioinformatics -################### +############## **Bioinformatics** is defined as the application of tools of computation and analysis to capture and interpret biological data. diff --git a/cookbook/docs/conf.py b/cookbook/docs/conf.py index 427d6cbba9..577259e47c 100644 --- a/cookbook/docs/conf.py +++ b/cookbook/docs/conf.py @@ -207,6 +207,8 @@ def __call__(self, filename): html_static_path = ["_static"] html_css_files = ["sphx_gallery_autogen.css", "custom.css"] +suppress_warnings = ["autosectionlabel.*"] + # generate autosummary even if no references autosummary_generate = True diff --git a/cookbook/docs/contribute.rst b/cookbook/docs/contribute.rst index db60a0cbae..21696cb940 100644 --- a/cookbook/docs/contribute.rst +++ b/cookbook/docs/contribute.rst @@ -142,12 +142,13 @@ Build Docker container: Package the examples by running .. prompt:: bash - + pyflyte --pkgs core package --image core:v1 -f Register the examples by running .. prompt:: bash + flytectl register files --archive -p flytesnacks -d development --archive flyte-package.tgz --version v1 Visit ``https://localhost:30081/console`` to view the Flyte console, which consists of the examples present in the diff --git a/cookbook/docs/data_processing.rst b/cookbook/docs/data_processing.rst deleted file mode 100644 index 0b0321d5d0..0000000000 --- a/cookbook/docs/data_processing.rst +++ /dev/null @@ -1,9 +0,0 @@ -:nosearch: - -############### -Data Processing -############### - -.. NOTE:: - - Coming soon 🛠 diff --git a/cookbook/docs/feature_engineering.rst b/cookbook/docs/feature_engineering.rst index c2e20fe471..f410cb9dbd 100644 --- a/cookbook/docs/feature_engineering.rst +++ b/cookbook/docs/feature_engineering.rst @@ -1,5 +1,3 @@ -:nosearch: - ################### Feature Engineering ################### diff --git a/cookbook/docs/flytelab.rst b/cookbook/docs/flyte_lab.rst similarity index 100% rename from cookbook/docs/flytelab.rst rename to cookbook/docs/flyte_lab.rst diff --git a/cookbook/docs/index.rst b/cookbook/docs/index.rst index 9ac0de7bc9..478a31cbcd 100644 --- a/cookbook/docs/index.rst +++ b/cookbook/docs/index.rst @@ -130,7 +130,7 @@ Table of Contents :hidden: |plane| Getting Started - |book-reader| User Guide + |book-reader| User Guide |chalkboard| Tutorials |project-diagram| Concepts |rocket| Deployment @@ -164,8 +164,8 @@ Table of Contents Tutorials ml_training feature_engineering - bioinformatics - flytelab + bioinformatics_examples + flyte_lab .. toctree:: :maxdepth: -1 diff --git a/cookbook/docs/integrations.rst b/cookbook/docs/integrations.rst index 1aa36e6669..97a7aa9f01 100644 --- a/cookbook/docs/integrations.rst +++ b/cookbook/docs/integrations.rst @@ -251,12 +251,10 @@ the Flyte task that use the respective plugin. .. rli:: https://raw.githubusercontent.com/flyteorg/flyte/master/kustomize/overlays/sandbox/flyte/config/propeller/enabled_plugins.yaml :language: yaml - Finding the ``ID`` of the Backend Plugin - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + **Finding the ``ID`` of the Backend Plugin** This is a little tricky since you have to look at the source code of the plugin to figure out the ``ID``. In the case of Spark, for example, the value of ``ID`` is `used `_ here, defined as `spark `_. - Enabling a Specific Backend Plugin in Your Own Kustomize Generator - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + **Enabling a Specific Backend Plugin in Your Own Kustomize Generator** Flyte uses Kustomize to generate the the deployment configuration which can be leveraged to `kustomize your own deployment `_. ********************** diff --git a/cookbook/docs/ml_monitoring.rst b/cookbook/docs/ml_monitoring.rst deleted file mode 100644 index 4910ae6a1d..0000000000 --- a/cookbook/docs/ml_monitoring.rst +++ /dev/null @@ -1,9 +0,0 @@ -:nosearch: - -################ -ML Monitoring -################ - -.. NOTE:: - - Coming soon 🛠 diff --git a/cookbook/docs/ml_training.rst b/cookbook/docs/ml_training.rst index 3027b82a48..b95e122c66 100644 --- a/cookbook/docs/ml_training.rst +++ b/cookbook/docs/ml_training.rst @@ -1,6 +1,6 @@ -################ +########### ML Training -################ +########### Understand how machine learning models can be trained from within Flyte, with an added advantage of orchestration benefits. diff --git a/cookbook/docs/tutorials.rst b/cookbook/docs/tutorials.rst index 5580058a11..f695d0ae76 100644 --- a/cookbook/docs/tutorials.rst +++ b/cookbook/docs/tutorials.rst @@ -6,11 +6,11 @@ Tutorials This section showcases step-by-step case studies of how to combine the different features of Flyte to achieve everything from data processing, feature engineering, model training, to batch predictions. Code for all of the examples in the -user guide can be found in the `flytesnacks repo `_. +user guide can be found in the `flytesnacks repo `__. `Flytesnacks `_ comes with a highly customized environment to make running, documenting and contributing samples easy. If this is your first time running these examples, follow the -:ref:`setup guide ` to get started. +:ref:`setup guide ` to get started. .. panels:: :header: text-center @@ -34,7 +34,7 @@ documenting and contributing samples easy. If this is your first time running th --- - .. link-button:: bioinformatics + .. link-button:: bioinformatics_examples :type: ref :text: 🧪 Bioinformatics :classes: btn-block stretched-link @@ -43,7 +43,7 @@ documenting and contributing samples easy. If this is your first time running th --- - .. link-button:: flytelab + .. link-button:: flyte_lab :type: ref :text: 🔬 Flytelab :classes: btn-block stretched-link diff --git a/cookbook/docs/userguide_setup.rst b/cookbook/docs/userguide_setup.rst index 75c2c4805b..fe27458894 100644 --- a/cookbook/docs/userguide_setup.rst +++ b/cookbook/docs/userguide_setup.rst @@ -123,4 +123,4 @@ Replace ```` with version from console UI, it may look something like ` What's Next? ^^^^^^^^^^^^ -Try out the examples in :doc:`Flyte Basics ` section. +Try out the examples in :doc:`Flyte Basics ` section. diff --git a/cookbook/integrations/aws/athena/README.rst b/cookbook/integrations/aws/athena/README.rst index 62fd517083..7b6f6827ca 100644 --- a/cookbook/integrations/aws/athena/README.rst +++ b/cookbook/integrations/aws/athena/README.rst @@ -3,24 +3,17 @@ AWS Athena ########## Executing Athena Queries -======================= -Flyte backend can be connected with athena. Once enabled, it allows you to query AWS Athena service (Presto + ANSI SQL Support) and retrieve typed schema (optionally). - -This section provides a guide on how to use the AWS Athena Plugin using flytekit python. +======================== +Flyte backend can be connected with Athena. Once enabled, it allows you to query AWS Athena service (Presto + ANSI SQL Support) and retrieve typed schema (optionally). +This plugin is purely a spec and since SQL is completely portable, it has no need to build a container. Thus this plugin example does not have any Dockerfile. Installation ------------ -To use the flytekit athena plugin simply run the following: +To use the flytekit Athena plugin, simply run the following: .. prompt:: bash pip install flytekitplugins-athena -No Need for a dockerfile ------------------------- -This plugin is purely a spec and since SQL is completely portable, it has no need to build a container. Thus this plugin examples do not have any Dockerfiles. - -.. TODO: write a subsection for "Configuring the backend to get athena working" -======= - Coming soon 🛠 +Now let's dive into the code. diff --git a/cookbook/integrations/aws/batch/batch.py b/cookbook/integrations/aws/batch/batch.py index f65406b9ed..b993451d84 100644 --- a/cookbook/integrations/aws/batch/batch.py +++ b/cookbook/integrations/aws/batch/batch.py @@ -3,7 +3,7 @@ ############ This example shows how to use a Flyte AWS batch plugin to execute a tasks on batch service. With AWS Batch, there is no need to install and manage batch computing software or server clusters - that you use to run your jobs, allowing you to focus on analyzing results and solving problems. +that you use to run your jobs, allowing you to focus on analyzing results and solving problems. """ from flytekit import task, workflow diff --git a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py index bd672ee62a..2998733146 100644 --- a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py +++ b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py @@ -89,7 +89,7 @@ # %% -# :ref:`single_task_execution` can be used to execute just the task without needing to create a workflow. +# :ref:`Single task execution ` can be used to execute just the task without needing to create a workflow. # To trigger an execution, you will need to provide: # # Project (flyteexamples): the project under which the execution will be created diff --git a/cookbook/integrations/flytekit_plugins/greatexpectations/README.rst b/cookbook/integrations/flytekit_plugins/greatexpectations/README.rst index 4df1fa7c66..51e95ff0bc 100644 --- a/cookbook/integrations/flytekit_plugins/greatexpectations/README.rst +++ b/cookbook/integrations/flytekit_plugins/greatexpectations/README.rst @@ -13,14 +13,14 @@ ensure that everything is as you expected and hence, data will not crash your pi How to Define the Integration ----------------------------- -Great Expectations supports native execution of expectations against various `Datasources `__, +Great Expectations supports native execution of expectations against various `Datasources `__, such as Pandas dataframes, Spark dataframes, and SQL databases via SQLAlchemy. We're supporting two Flyte types that should suit Great Expectations' ``Datasources``: - :py:class:`flytekit.types.file.FlyteFile`: ``FlyteFile`` represents an automatic persistence object in Flyte. It can represent files in remote storage and Flyte transparently materializes them in every task execution. -- :py:class:`flytekit.types.schema.FlyteSchema`: ``FlyteSchema`` supports tabular data, which the plugin will convert into a parquet file and validate the data using Great Expectations. +- :py:class:`flytekit.types.structured.StructuredDataset`: ``StructuredDataset`` supports pandas dataframes, which the plugin will convert into a parquet file and validate the data using Great Expectations. .. note:: Flyte types are added because, in Great Expectations, we have the privilege to give a non-string (Pandas/Spark DataFrame) when using a diff --git a/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py b/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py index 9cab370755..32d7371316 100644 --- a/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py +++ b/cookbook/integrations/flytekit_plugins/sql/sql_alchemy.py @@ -30,7 +30,7 @@ # # .. note:: # -# The output of SQLAlchemyTask is a :py:class:`~flytekit.types.schema.FlyteSchema` by default. +# The output of SQLAlchemyTask is a ``FlyteSchema`` by default. # # .. caution:: # @@ -67,7 +67,7 @@ # # .. code:: # -# pyflyte --config ~/.flyte/your-config.yaml run --destination-dir /app --remote --image ghcr.io/flyteorg/flytekit:py3.8-sqlalchemy-latest integrations/flytekit_plugins/sql/sql_alchemy.py my_wf --min_length 3 --max_length 100 --limit 50 +# pyflyte --config ~/.flyte/your-config.yaml run --destination-dir /app --remote --image ghcr.io/flyteorg/flytekit:py3.8-sqlalchemy-latest integrations/flytekit_plugins/sql/sql_alchemy.py my_wf --min_length 3 --max_length 100 --limit 50 # # Note also we added the ``destination-dir`` argument, since by default ``pyflyte run`` copies code into ``/root`` which # is not what that image's workdir is set to. diff --git a/cookbook/integrations/gcp/bigquery/README.rst b/cookbook/integrations/gcp/bigquery/README.rst index 25da2b6ff0..670d8e181a 100644 --- a/cookbook/integrations/gcp/bigquery/README.rst +++ b/cookbook/integrations/gcp/bigquery/README.rst @@ -19,5 +19,5 @@ This plugin is purely a spec. Since SQL is completely portable, there is no need Configuring the backend to get bigquery working ------------------------------------------------- +----------------------------------------------- - BigQuery plugins are `enabled in flytepropeller's config `_ diff --git a/cookbook/integrations/gcp/bigquery/bigquery.py b/cookbook/integrations/gcp/bigquery/bigquery.py index b76e412771..4552b9f17f 100644 --- a/cookbook/integrations/gcp/bigquery/bigquery.py +++ b/cookbook/integrations/gcp/bigquery/bigquery.py @@ -1,6 +1,6 @@ """ BigQuery Query -############ +############## This example shows how to use a Flyte BigQueryTask to execute a query. """ diff --git a/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py b/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py index e51a6343bd..77b9279dfe 100644 --- a/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py +++ b/cookbook/integrations/kubernetes/k8s_spark/pyspark_pi.py @@ -14,7 +14,7 @@ #. Ensure you have ``flytekit>=0.16.0`` #. Enable Spark in backend, following the previous section. -#. Install the `flytekit spark plugin `_ :: +#. Install the `flytekit spark plugin `__ :: pip install flytekitplugins-spark diff --git a/cookbook/integrations/kubernetes/ray_example/ray_example.py b/cookbook/integrations/kubernetes/ray_example/ray_example.py index e6d8662064..880485f721 100644 --- a/cookbook/integrations/kubernetes/ray_example/ray_example.py +++ b/cookbook/integrations/kubernetes/ray_example/ray_example.py @@ -18,7 +18,7 @@ # %% # Ray Task -# ========= +# ======== # # We define a ray_example `remote function `__ that will be executed asynchronously in the Ray cluster. @ray.remote @@ -28,7 +28,7 @@ def f(x): # %% # Defining a Ray Config -# ==================== +# ===================== # # We create a HeadNodeConfig and WorkerNodeConfig for the Ray job, and these config will be used by Ray operator to launch a Ray cluster before running the task. # diff --git a/cookbook/larger_apps/larger_apps_deploy.py b/cookbook/larger_apps/larger_apps_deploy.py index abd4a17e1a..406d97a481 100644 --- a/cookbook/larger_apps/larger_apps_deploy.py +++ b/cookbook/larger_apps/larger_apps_deploy.py @@ -84,8 +84,8 @@ .. tip:: Recommended The ``flytekit-python-template`` ships with a helper script called `docker_build_and_tag.sh - `__ , which makes it - possible to build and image, tag it correctly and optionally use the git-SHA as the version. We recommend using + `__, + which makes it possible to build and image, tag it correctly and optionally use the git-SHA as the version. We recommend using such a script to track versions more effectively and using a CI/CD pipeline to deploy your code. .. prompt:: bash $ diff --git a/cookbook/larger_apps/larger_apps_iterate.py b/cookbook/larger_apps/larger_apps_iterate.py index b4d064fcd4..a8be1d1d8c 100644 --- a/cookbook/larger_apps/larger_apps_iterate.py +++ b/cookbook/larger_apps/larger_apps_iterate.py @@ -199,7 +199,7 @@ def my_wf(message: str) -> str: .. tip:: - Alternatively, visit `the Flyte sandbox console `__, + Alternatively, visit the Flyte sandbox console at ``http://localhost:30081/console/projects/flytesnacks/domains/development/workflows/flyte.workflows.example.my_wf``, click launch, and provide a ``message`` as input. .. TODO: update so that it reflects the new workflow with the message input https://raw.githubusercontent.com/flyteorg/static-resources/main/flyte/getting_started/getting_started_fastreg.gif diff --git a/cookbook/remote_access/remote_workflow.py b/cookbook/remote_access/remote_workflow.py index a7f1feaf5f..d619485b7d 100644 --- a/cookbook/remote_access/remote_workflow.py +++ b/cookbook/remote_access/remote_workflow.py @@ -8,9 +8,9 @@ automatically create a ``default launchplan`` for the workflow. A ``default launchplan`` has the same name as the workflow and all argument defaults are similar. See -:ref:`sphx_glr_auto_remote_access_remote_launchplan.py` to run a workflow via the default launchplan. +:ref:`Launch Plans` to run a workflow via the default launchplan. -:ref:`Tasks also can be executed ` using the launch command. +:ref:`Tasks also can be executed ` using the launch command. One difference between running a task and a workflow via launchplans is that launchplans cannot be associated with a task. This is to avoid triggers and scheduling.