From 0cf167e74c3839bb0c5cd6107a237f6bd39095dd Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Sun, 23 May 2021 14:39:34 +0530 Subject: [PATCH] Fix broken links (#224) * fix broken links Signed-off-by: Samhita Alla * fixed rst ref Signed-off-by: Samhita Alla * added rst ref for getting started link Signed-off-by: Samhita Alla --- .../house_price_prediction/README.rst | 2 +- .../core/containerization/multi_images.py | 4 +- cookbook/core/control_flow/map_task.py | 2 +- cookbook/core/extend_flyte/README.rst | 2 +- .../core/extend_flyte/custom_task_plugin.py | 2 +- .../core/extend_flyte/run_custom_types.py | 4 +- cookbook/deployment/cluster/README.rst | 52 ++++++++++++++++--- .../workflow/customizing_resources.py | 2 +- cookbook/integrations/aws/athena/README.rst | 7 +++ .../sagemaker_builtin_algo_training.py | 4 +- .../external_services/databricks/README.rst | 7 +++ .../external_services/snowflake/README.rst | 7 +++ .../flytekit_plugins/pandera/README.rst | 2 +- .../flytekit_plugins/papermilltasks/simple.py | 2 +- .../flytekit_plugins/sqlalchemy/README.rst | 7 +++ .../flytekit_plugins/sqllite3/README.rst | 7 +++ .../kubernetes/k8s_spark/dataframe_passing.py | 2 +- cookbook/type_system/typed_schema.py | 2 + 18 files changed, 94 insertions(+), 23 deletions(-) diff --git a/cookbook/case_studies/house_price_prediction/README.rst b/cookbook/case_studies/house_price_prediction/README.rst index 81b8ec9b1d..b1386b855b 100644 --- a/cookbook/case_studies/house_price_prediction/README.rst +++ b/cookbook/case_studies/house_price_prediction/README.rst @@ -13,7 +13,7 @@ Where does Flyte fit in? A typical house price prediction model isn’t dynamic, but a task has to be dynamic when multiple regions are involved. -To learn more about dynamic workflows, refer to `Write a dynamic task `__. +To learn more about dynamic workflows, refer to :std:doc:`auto/core/control_flow/dynamics`. Dataset ======= diff --git a/cookbook/core/containerization/multi_images.py b/cookbook/core/containerization/multi_images.py index eeaec575d0..00482b8b1a 100644 --- a/cookbook/core/containerization/multi_images.py +++ b/cookbook/core/containerization/multi_images.py @@ -25,8 +25,8 @@ Flyte (Service) by default does not require that a Workflow is bound to a single Container Image. Flytekit offers a simple interface to easily alter the images that should be associated per task, yet keeping the local execution simple for the user. -For every :py:class:`flytekit.core.task.PythonFunctionTask` type task or simply a task that is decorated with the ``@task`` decorator, users can supply rules of how the container image should be bound. By default, Flytekit will associate one container image with all tasks. This image is called the ``default`` image. -To alter the image, users should use the ``container_image`` parameter available in the :py:func:`flytekit.task` decorator. Either one of the following is acceptable: +For every :py:class:`flytekit.PythonFunctionTask` type task or simply a task that is decorated with the ``@task`` decorator, users can supply rules of how the container image should be bound. By default, flytekit will associate one container image with all tasks. This image is called the ``default`` image. +To alter the image, users should use the ``container_image`` parameter available in the :py:func:`flytekit.task` decorator. Any one of the following is an acceptable #. Image reference is specified, but the version is derived from the default images version ``container_image="docker.io/redis:{{.image.default.version}},`` #. Both the FQN and the version are derived from the default image ``container_image="{{.image.default.fqn}}:spark-{{.image.default.version}},`` diff --git a/cookbook/core/control_flow/map_task.py b/cookbook/core/control_flow/map_task.py index c9813ccdb6..9b05e3e05f 100644 --- a/cookbook/core/control_flow/map_task.py +++ b/cookbook/core/control_flow/map_task.py @@ -31,7 +31,7 @@ def coalesce(b: typing.List[str]) -> str: # %% -# To use a map task in your workflow, use the :py:func:`flytekit:flytekit.map_task` function and pass in an individual +# To use a map task in your workflow, use the :py:func:`flytekit:flytekit.core.map_task` function and pass in an individual # task to be repeated across a collection of inputs. In this case the type of a, ``typing.List[int]`` is a list of the # input type defined for ``a_mappable_task``. @workflow diff --git a/cookbook/core/extend_flyte/README.rst b/cookbook/core/extend_flyte/README.rst index 40dcfea368..452a674c97 100644 --- a/cookbook/core/extend_flyte/README.rst +++ b/cookbook/core/extend_flyte/README.rst @@ -20,7 +20,7 @@ Case 1: I want to use my special Types - e.g. my own DataFrame format Flyte, just like a programming language has a core type-system, but just like most languages, this type system can be extended by allowing users to add ``User defined Data types``. A User defined data type can be something that Flyte does not really understand, but is extremely useful for a users specific needs. For example it can be a custom user structure or a grouping of images in a specific encoding. -Flytekit natively supports handling of structured data like User defined structures like DataClasses using JSON as the representation format. An example of this is available in FlyteCookbook - :std:doc:`auto_core_intermediate/custom_objects`. +Flytekit natively supports handling of structured data like User defined structures like DataClasses using JSON as the representation format. An example of this is available in FlyteCookbook - :std:doc:`auto/type_system/custom_objects`. For types that are not simply representable as JSON documents, Flytekit allows users to extends Flyte's type system and implement these types in Python. The user has to essentially implement a :py:class:`flytekit.extend.TypeTransformer` class to enable translation of the type from Users type to flyte understood types. As an example, instead of using :py:class:`pandas.DataFrame` directly, you may want to use `Pandera `_ to perform validation of an input or output dataframe. an example can be found `here `_. diff --git a/cookbook/core/extend_flyte/custom_task_plugin.py b/cookbook/core/extend_flyte/custom_task_plugin.py index ed7ddee8db..b75197b20a 100644 --- a/cookbook/core/extend_flyte/custom_task_plugin.py +++ b/cookbook/core/extend_flyte/custom_task_plugin.py @@ -45,7 +45,7 @@ def wait_and_run(path: str) -> int: # Plugin Structure # ^^^^^^^^^^^^^^^^^ # As illustrated above to achieve this structure we need to create a Class called ``WaitForObjectStoreFile``, which -# derives from :py:class:`flytekit.core.PythonFunctionTask` as follows. +# derives from :py:class:`flytekit.PythonFunctionTask` as follows. # class WaitForObjectStoreFile(PythonTask): """ diff --git a/cookbook/core/extend_flyte/run_custom_types.py b/cookbook/core/extend_flyte/run_custom_types.py index 44bd546d15..ec145b0777 100644 --- a/cookbook/core/extend_flyte/run_custom_types.py +++ b/cookbook/core/extend_flyte/run_custom_types.py @@ -67,7 +67,7 @@ def new_file(self, name: str) -> str: # ``MyDataset`` represents a set of files locally, but, when a workflow consists of multiple steps, we want the data to # flow between the different steps. To achieve this, it is necessary to explain how the data will be transformed to # Flyte's remote references. To do this, we create a new instance of -# :py:class:`flytekit.core.type_engine.TypeTransformer`, for the type ``MyDataset`` as follows +# :py:class:`flytekit.extend.TypeTransformer`, for the type ``MyDataset`` as follows # # .. note:: # @@ -125,7 +125,7 @@ def to_python_value( # %% # Before we can use MyDataset in our tasks, we need to let flytekit know that ``MyDataset`` should be considered as a -# valid type. This is done using the :py:func:`flytekit.core.type_engine.TypeEngine.register` function. +# valid type. This is done using the :py:func:`flytekit.extend.TypeEngine.register` function. TypeEngine.register(MyDatasetTransformer()) diff --git a/cookbook/deployment/cluster/README.rst b/cookbook/deployment/cluster/README.rst index 7b658f6589..58e8c1cf6a 100644 --- a/cookbook/deployment/cluster/README.rst +++ b/cookbook/deployment/cluster/README.rst @@ -6,21 +6,57 @@ Locally, Flytekit relies on the Python interpreter to execute both tasks and wor To leverage the full power of Flyte, we recommend using a deployed backend of Flyte. Flyte can be run on any Kubernetes cluster - a local cluster like `kind `__, in a cloud environment or on-prem. -Please refer to the `Installing Flyte `__ for details on getting started with a Flyte installation. - +Please refer to the :doc:`Getting Started ` for details on getting started with the Flyte installation. 1. First commit your changes. Some of the steps below default to referencing the git sha. -1. Run `make serialize`. This will build the image tagged with just `flytecookbook:`, no registry will be prefixed. See the image building section below for additional information. +2. Run `make serialize`. This will build the image tagged with just `flytecookbook:`, no registry will be prefixed. See the image building section below for additional information. Build your Dockerfile ^^^^^^^^^^^^^^^^^^^^^^ The first step of this process is building a container image that holds your code. -.. literalinclude:: ../../Dockerfile - :language: dockerfile - :emphasize-lines: 1 - :linenos: +.. code-block:: docker + :emphasize-lines: 1 + :linenos: + + FROM python:3.8-slim-buster + LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks + + WORKDIR /root + ENV VENV /opt/venv + ENV LANG C.UTF-8 + ENV LC_ALL C.UTF-8 + ENV PYTHONPATH /root + + # This is necessary for opencv to work + RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential + + # Install the AWS cli separately to prevent issues with boto being written over + RUN pip3 install awscli + + ENV VENV /opt/venv + # Virtual environment + RUN python3 -m venv ${VENV} + ENV PATH="${VENV}/bin:$PATH" + + # Install Python dependencies + COPY core/requirements.txt /root + RUN pip install -r /root/requirements.txt + + # Copy the makefile targets to expose on the container. This makes it easier to register + COPY in_container.mk /root/Makefile + COPY core/sandbox.config /root + + # Copy the actual code + COPY core /root/core + + # This tag is supplied by the build script and will be used to determine the version + # when registering tasks, workflows, and launch plans + ARG tag + ENV FLYTE_INTERNAL_IMAGE $tag +.. note:: + ``core`` is the directory being considered in the above Dockerfile. Serialize your workflows and tasks ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -50,7 +86,7 @@ In-container serialization Notice that the commands above are run locally, _not_ inside the container. Strictly speaking, to be rigourous, serialization should be done within the container for the following reasons. 1. It ensures that the versions of all libraries used at execution time on the Flyte platform, are the same that are used during serialization. -1. Since serialization runs part of flytekit, it helps ensure that your container is set up correctly. +2. Since serialization runs part of flytekit, it helps ensure that your container is set up correctly. Take a look at this make target to see how it's done. .. code-block:: diff --git a/cookbook/deployment/workflow/customizing_resources.py b/cookbook/deployment/workflow/customizing_resources.py index c3199e6d13..d220993fca 100644 --- a/cookbook/deployment/workflow/customizing_resources.py +++ b/cookbook/deployment/workflow/customizing_resources.py @@ -17,7 +17,7 @@ # Requests are treated as hints which are used to schedule tasks on nodes with available resources, whereas limits # are hard constraints. # -# For either a request or limit, refer to the :py:class:`flytekit:flytekit.Resource` documentation. +# For either a request or limit, refer to the :py:class:`flytekit:flytekit.Resources` documentation. # # The following attributes can be specified for a ``Resource``. # diff --git a/cookbook/integrations/aws/athena/README.rst b/cookbook/integrations/aws/athena/README.rst index e69de29bb2..5b6010e1fa 100644 --- a/cookbook/integrations/aws/athena/README.rst +++ b/cookbook/integrations/aws/athena/README.rst @@ -0,0 +1,7 @@ +###### +Athena +###### + +.. NOTE:: + + Coming soon πŸ›  \ No newline at end of file diff --git a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py index 707511eb28..5b8da2609b 100644 --- a/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py +++ b/cookbook/integrations/aws/sagemaker_training/sagemaker_builtin_algo_training.py @@ -8,10 +8,8 @@ Defining a XGBoost Training job -------------------------------- We will create a job that will train an XGBoost model using the prebuilt algorithms @Sagemaker. -Refer to `Sagemaker XGBoost docs here `_ +Refer to `Sagemaker XGBoost docs here `_. To understand more about XGBoost refer `here `_. -To dive deeper into the Flytekit API refer to -`docs `_ """ import typing diff --git a/cookbook/integrations/external_services/databricks/README.rst b/cookbook/integrations/external_services/databricks/README.rst index e69de29bb2..47aeb0e172 100644 --- a/cookbook/integrations/external_services/databricks/README.rst +++ b/cookbook/integrations/external_services/databricks/README.rst @@ -0,0 +1,7 @@ +########## +Databricks +########## + +.. NOTE:: + + Coming soon πŸ›  \ No newline at end of file diff --git a/cookbook/integrations/external_services/snowflake/README.rst b/cookbook/integrations/external_services/snowflake/README.rst index e69de29bb2..7172cba418 100644 --- a/cookbook/integrations/external_services/snowflake/README.rst +++ b/cookbook/integrations/external_services/snowflake/README.rst @@ -0,0 +1,7 @@ +######### +Snowflake +######### + +.. NOTE:: + + Coming soon πŸ›  \ No newline at end of file diff --git a/cookbook/integrations/flytekit_plugins/pandera/README.rst b/cookbook/integrations/flytekit_plugins/pandera/README.rst index 3972b54bfe..c8ade5a7e2 100644 --- a/cookbook/integrations/flytekit_plugins/pandera/README.rst +++ b/cookbook/integrations/flytekit_plugins/pandera/README.rst @@ -2,7 +2,7 @@ Pandera: Type Annotations for Pandas DataFrames =============================================== Flytekit python natively supports :ref:`many data types `, -including a :ref:`FlyteSchema ` type for +including a :ref:`FlyteSchema ` type for type-annotating pandas dataframes. The flytekit pandera plugin provides an alternative for defining dataframe schemas by integrating with `pandera `_, which is a runtime data validation tool for pandas dataframes. diff --git a/cookbook/integrations/flytekit_plugins/papermilltasks/simple.py b/cookbook/integrations/flytekit_plugins/papermilltasks/simple.py index c83a8e8ac7..178e86ef08 100644 --- a/cookbook/integrations/flytekit_plugins/papermilltasks/simple.py +++ b/cookbook/integrations/flytekit_plugins/papermilltasks/simple.py @@ -26,7 +26,7 @@ # .. image:: https://raw.githubusercontent.com/flyteorg/flyte/static-resources/img/papermilltasks/outputs.png # :alt: Example of "parameters tag" added to the cell with input variables # -# #. Now in a python file, you have to create a new task at the ``module`` level. Refer to :py:class:`NotebookTask` +# #. Now in a python file, you have to create a new task at the ``module`` level. # The example task is shown below nb = NotebookTask( name="simple-nb", diff --git a/cookbook/integrations/flytekit_plugins/sqlalchemy/README.rst b/cookbook/integrations/flytekit_plugins/sqlalchemy/README.rst index e69de29bb2..ef49600d95 100644 --- a/cookbook/integrations/flytekit_plugins/sqlalchemy/README.rst +++ b/cookbook/integrations/flytekit_plugins/sqlalchemy/README.rst @@ -0,0 +1,7 @@ +########### +SQL Alchemy +########### + +.. NOTE:: + + Coming soon πŸ›  \ No newline at end of file diff --git a/cookbook/integrations/flytekit_plugins/sqllite3/README.rst b/cookbook/integrations/flytekit_plugins/sqllite3/README.rst index e69de29bb2..cbc7644ec1 100644 --- a/cookbook/integrations/flytekit_plugins/sqllite3/README.rst +++ b/cookbook/integrations/flytekit_plugins/sqllite3/README.rst @@ -0,0 +1,7 @@ +######## +SQLlite3 +######## + +.. NOTE:: + + Coming soon πŸ›  \ No newline at end of file diff --git a/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py b/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py index e5929677eb..e384bf0b25 100644 --- a/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py +++ b/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py @@ -55,7 +55,7 @@ def create_spark_df() -> my_schema: # The task ``sum_of_all_ages`` receives a parameter of type :any:`df_my_schema_definition`. It is important to note that there is no # expectation that the schema is a pandas dataframe or a spark dataframe, but just a generic schema object. The Flytekit schema object # can be read into multiple formats using the ``open()`` method. Default conversion is to :py:class:`pandas.DataFrame` -# Refer to :py:class:`flytekit.FlyteSchema` for more details +# Refer to :py:class:`flytekit.types.schema.FlyteSchema` for more details # @task(cache_version="1") def sum_of_all_ages(s: my_schema) -> int: diff --git a/cookbook/type_system/typed_schema.py b/cookbook/type_system/typed_schema.py index 71f7d6a916..0dd163ca2d 100644 --- a/cookbook/type_system/typed_schema.py +++ b/cookbook/type_system/typed_schema.py @@ -1,4 +1,6 @@ """ +.. _typed_schema: + Typed columns in a schema ------------------------------