From 63e580cf02f9a9439fadd813b11d670d1942470c Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 10 Sep 2024 00:46:59 -0700 Subject: [PATCH 01/28] Fix pandera example (#1731) Fix pandera exampel Signed-off-by: Kevin Su --- examples/pandera_plugin/pandera_plugin/basic_schema_example.py | 2 +- .../pandera_plugin/validating_and_testing_ml_pipelines.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py index 8c8d36b2a..adc164f4d 100644 --- a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py +++ b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py @@ -15,7 +15,7 @@ from flytekit import ImageSpec, task, workflow from pandera.typing import DataFrame, Series -custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"]) +custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"]) # %% [markdown] # ## A Simple Data Processing Pipeline diff --git a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py index 85fdef648..2cb649312 100644 --- a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py +++ b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py @@ -52,7 +52,7 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score -custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"]) +custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"]) # %% [markdown] # We also need to import the `pandera` flytekit plugin to enable dataframe runtime type-checking: From 1b4d46d04f3d11b8d70e78a421719abcd4887756 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Fri, 13 Sep 2024 18:15:20 -0700 Subject: [PATCH 02/28] unpin whylogs (#1732) Signed-off-by: Kevin Su --- .github/workflows/checks.yml | 6 +++--- .github/workflows/serialize_example.yml | 4 ++-- examples/whylogs_plugin/requirements.in | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 9c7b9fb10..ead9fa466 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -136,7 +136,7 @@ jobs: --force tar -xvf flyte-package.tgz - name: Upload artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: snacks-examples-${{ matrix.example }} path: examples/${{ matrix.example }}/**/*.pb @@ -155,7 +155,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: setup sandbox @@ -216,7 +216,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: Package Examples diff --git a/.github/workflows/serialize_example.yml b/.github/workflows/serialize_example.yml index 4d1f38c78..8b5442532 100644 --- a/.github/workflows/serialize_example.yml +++ b/.github/workflows/serialize_example.yml @@ -38,7 +38,7 @@ jobs: ./scripts/serialize-example.sh ${{ matrix.directory }} ${{ github.sha }} tar -xvf ${{ matrix.directory }}/flyte-package.tgz -C ${{ matrix.directory }} - name: Upload artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: snacks-${{ steps.example_id.outputs.EXAMPLE_ID }} path: ${{ matrix.directory }}/**/*.pb @@ -57,7 +57,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: setup sandbox diff --git a/examples/whylogs_plugin/requirements.in b/examples/whylogs_plugin/requirements.in index 0091e81e3..3f8bc07aa 100644 --- a/examples/whylogs_plugin/requirements.in +++ b/examples/whylogs_plugin/requirements.in @@ -4,6 +4,6 @@ matplotlib flytekitplugins-deck-standard flytekitplugins-whylogs>=1.1.1b0 scikit-learn -whylogs[s3]==1.3.30 +whylogs[s3] whylogs[mlflow] whylogs[whylabs] From a94aa970b1e74f44acc498dff777410355867cfe Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Thu, 19 Sep 2024 19:39:44 +0530 Subject: [PATCH 03/28] add ollama docs (#1722) * add ollama docs Signed-off-by: Samhita Alla * update doc Signed-off-by: Samhita Alla * update index Signed-off-by: Samhita Alla * add system prompt as task input Signed-off-by: Samhita Alla * add user prompt Signed-off-by: Samhita Alla * set gpu to 1 in ollama initialization Signed-off-by: Samhita Alla * remove modelfile Signed-off-by: Samhita Alla * bump version Signed-off-by: Samhita Alla * fix code Signed-off-by: Samhita Alla --------- Signed-off-by: Samhita Alla --- docs/index.md | 1 + docs/integrations.md | 2 + examples/nim_plugin/README.md | 2 +- examples/ollama_plugin/Dockerfile | 23 +++++ examples/ollama_plugin/README.md | 36 +++++++ .../ollama_plugin/ollama_plugin/__init__.py | 0 .../ollama_plugin/ollama_plugin/serve_llm.py | 99 +++++++++++++++++++ examples/ollama_plugin/requirements.in | 1 + 8 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 examples/ollama_plugin/Dockerfile create mode 100644 examples/ollama_plugin/README.md create mode 100644 examples/ollama_plugin/ollama_plugin/__init__.py create mode 100644 examples/ollama_plugin/ollama_plugin/serve_llm.py create mode 100644 examples/ollama_plugin/requirements.in diff --git a/docs/index.md b/docs/index.md index a67bfb480..027058e5c 100644 --- a/docs/index.md +++ b/docs/index.md @@ -120,6 +120,7 @@ auto_examples/mmcloud_agent/index auto_examples/modin_plugin/index auto_examples/kfmpi_plugin/index auto_examples/nim_plugin/index +auto_examples/ollama_plugin/index auto_examples/onnx_plugin/index auto_examples/openai_batch_agent/index auto_examples/papermill_plugin/index diff --git a/docs/integrations.md b/docs/integrations.md index f4ff3e08f..b763017ad 100644 --- a/docs/integrations.md +++ b/docs/integrations.md @@ -44,6 +44,8 @@ Flytekit functionality. These plugins can be anything and for comparison can be - `wandb`: Machine learning platform to build better models faster. * - {doc}`NIM ` - Serve optimized model containers with NIM. +* - {doc}`Ollama ` + - Serve fine-tuned LLMs with Ollama in a Flyte workflow. ``` :::{dropdown} {fa}`info-circle` Using flytekit plugins diff --git a/examples/nim_plugin/README.md b/examples/nim_plugin/README.md index 506c9eab9..36011695b 100644 --- a/examples/nim_plugin/README.md +++ b/examples/nim_plugin/README.md @@ -29,7 +29,7 @@ pip install flytekitplugins-inference For a usage example, see {doc}`NIM example usage `. ```{note} -NIM can only be run in a Flyte cluster, not locally, as it must be deployed as a sidecar service in a Kubernetes pod. +NIM can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. ``` ```{toctree} diff --git a/examples/ollama_plugin/Dockerfile b/examples/ollama_plugin/Dockerfile new file mode 100644 index 000000000..0c46be23a --- /dev/null +++ b/examples/ollama_plugin/Dockerfile @@ -0,0 +1,23 @@ +# ###################### +# NOTE: For CI/CD only # +######################## +FROM python:3.11-slim-buster +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in + +# Copy the actual code +COPY . /root/ + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/ollama_plugin/README.md b/examples/ollama_plugin/README.md new file mode 100644 index 000000000..75b97f0fb --- /dev/null +++ b/examples/ollama_plugin/README.md @@ -0,0 +1,36 @@ +(ollama_plugin)= + +# Ollama + +```{eval-rst} +.. tags:: Inference, LLM +``` + +Serve large language models (LLMs) in a Flyte task. + +[Ollama](https://ollama.com/) simplifies the process of serving fine-tuned LLMs. +Whether you're generating predictions from a customized model or deploying it across different hardware setups, +Ollama enables you to encapsulate the entire workflow in a single pipeline. + +## Installation + +To use the Ollama plugin, run the following command: + +``` +pip install flytekitplugins-inference +``` + +## Example usage + +For a usage example, see {doc}`Ollama example usage `. + +```{note} +Ollama can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. +``` + +```{toctree} +:maxdepth: -1 +:hidden: + +serve_llm +``` diff --git a/examples/ollama_plugin/ollama_plugin/__init__.py b/examples/ollama_plugin/ollama_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/ollama_plugin/ollama_plugin/serve_llm.py b/examples/ollama_plugin/ollama_plugin/serve_llm.py new file mode 100644 index 000000000..f96ef7252 --- /dev/null +++ b/examples/ollama_plugin/ollama_plugin/serve_llm.py @@ -0,0 +1,99 @@ +# %% [markdown] +# (serve_llm)= +# +# # Serve LLMs with Ollama +# +# In this guide, you'll learn how to locally serve Gemma2 and fine-tuned Llama3 models using Ollama within a Flyte task. +# +# Start by importing Ollama from the `flytekitplugins.inference` package and specifying the desired model name. +# +# Below is a straightforward example of serving a Gemma2 model: +# %% +from flytekit import ImageSpec, Resources, task +from flytekit.extras.accelerators import A10G +from flytekitplugins.inference import Model, Ollama +from openai import OpenAI + +image = ImageSpec( + name="ollama_serve", + registry="ghcr.io/flyteorg", + packages=["flytekitplugins-inference"], + builder="default", +) + +ollama_instance = Ollama(model=Model(name="gemma2"), gpu="1") + + +@task( + container_image=image, + pod_template=ollama_instance.pod_template, + accelerator=A10G, + requests=Resources(gpu="0"), +) +def model_serving(user_prompt: str) -> str: + client = OpenAI(base_url=f"{ollama_instance.base_url}/v1", api_key="ollama") # api key required but ignored + + completion = client.chat.completions.create( + model="gemma2", + messages=[ + { + "role": "user", + "content": user_prompt, + } + ], + temperature=0.5, + top_p=1, + max_tokens=1024, + ) + + return completion.choices[0].message.content + + +# %% [markdown] +# :::{important} +# Replace `ghcr.io/flyteorg` with a container registry to which you can publish. +# To upload the image to the local registry in the demo cluster, indicate the registry as `localhost:30000`. +# ::: +# +# The `model_serving` task initiates a sidecar service to serve the model, making it accessible on localhost via the `base_url` property. +# You can use either the chat or chat completion endpoints. +# +# By default, Ollama initializes the server with `cpu`, `gpu`, and `mem` set to `1`, `1`, and `15Gi`, respectively. +# You can adjust these settings to meet your requirements. +# +# To serve a fine-tuned model, provide the model configuration as `modelfile` within the `Model` dataclass. +# +# Below is an example of specifying a fine-tuned LoRA adapter for a Llama3 Mario model: +# %% +from flytekit.types.file import FlyteFile + +finetuned_ollama_instance = Ollama( + model=Model( + name="llama3-mario", + modelfile="FROM llama3\nADAPTER {inputs.ggml}\nPARAMETER temperature 1\nPARAMETER num_ctx 4096\nSYSTEM {inputs.system_prompt}", + ), + gpu="1", +) + + +@task( + container_image=image, + pod_template=finetuned_ollama_instance.pod_template, + accelerator=A10G, + requests=Resources(gpu="0"), +) +def finetuned_model_serving(ggml: FlyteFile, system_prompt: str): + ... + + +# %% [markdown] +# `{inputs.ggml}` and `{inputs.system_prompt}` are materialized at run time, with `ggml` and `system_prompt` available as inputs to the task. +# +# Ollama models can be integrated into different stages of your AI workflow, including data pre-processing, +# model inference, and post-processing. Flyte also allows serving multiple Ollama models simultaneously +# on various instances. +# +# This integration enables you to self-host and serve AI models on your own infrastructure, +# ensuring full control over costs and data security. +# +# For more detailed information on the models natively supported by Ollama, visit the [Ollama models library](https://ollama.com/library). diff --git a/examples/ollama_plugin/requirements.in b/examples/ollama_plugin/requirements.in new file mode 100644 index 000000000..a4a684ce6 --- /dev/null +++ b/examples/ollama_plugin/requirements.in @@ -0,0 +1 @@ +flytekitplugins-inference>=1.13.6b1 From b7b0f7a4248e175993541b9eba1d0720be5cebf8 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Tue, 24 Sep 2024 15:40:57 +0800 Subject: [PATCH 04/28] Update Databricks agent example to V2 (#1733) * use V2 instead Signed-off-by: Future-Outlier * Update examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py Co-authored-by: Kevin Su Signed-off-by: Future-Outlier * Update examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py Co-authored-by: Kevin Su Signed-off-by: Future-Outlier --------- Signed-off-by: Future-Outlier Co-authored-by: Kevin Su --- .../databricks_agent/databricks_agent_example_usage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py index 9de652bc3..1fe9d3ff9 100644 --- a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py +++ b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py @@ -11,7 +11,7 @@ import flytekit from flytekit import Resources, task, workflow -from flytekitplugins.spark import Databricks +from flytekitplugins.spark import DatabricksV2 as Databricks # %% [markdown] From a1dde1984ecd15d18d30e4db3afa62579a580978 Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Fri, 27 Sep 2024 02:18:45 +0800 Subject: [PATCH 05/28] Update FlyteDeck Example (#1734) * use V2 instead Signed-off-by: Future-Outlier * update flyte deck example Signed-off-by: Future-Outlier * nit Signed-off-by: Future-Outlier --------- Signed-off-by: Future-Outlier --- .../development_lifecycle/decks.py | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/examples/development_lifecycle/development_lifecycle/decks.py b/examples/development_lifecycle/development_lifecycle/decks.py index d849bd339..50cf4daf2 100644 --- a/examples/development_lifecycle/development_lifecycle/decks.py +++ b/examples/development_lifecycle/development_lifecycle/decks.py @@ -13,7 +13,17 @@ # https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html#image-spec-example custom_image = ImageSpec( - packages=["plotly", "scikit-learn", "flytekitplugins-deck-standard"], registry="ghcr.io/flyteorg" + packages=[ + "flytekitplugins-deck-standard", + "markdown", + "pandas", + "pillow", + "plotly", + "pyarrow", + "scikit-learn", + "ydata_profiling", + ], + registry="ghcr.io/flyteorg", ) if custom_image.is_container(): @@ -47,7 +57,7 @@ def pca_plot(): from flytekitplugins.deck.renderer import FrameProfilingRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def frame_renderer() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) flytekit.Deck("Frame Renderer", FrameProfilingRenderer().to_html(df=df)) @@ -61,7 +71,7 @@ def frame_renderer() -> None: from flytekit.deck import TopFrameRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]: return pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) @@ -70,7 +80,7 @@ def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]: # producing HTML as a Unicode string. -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def markdown_renderer() -> None: flytekit.current_context().default_deck.append( MarkdownRenderer().to_html("You can install flytekit using this command: ```import flytekit```") @@ -87,7 +97,7 @@ def markdown_renderer() -> None: from flytekitplugins.deck.renderer import BoxRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def box_renderer() -> None: iris_df = px.data.iris() flytekit.Deck("Box Plot", BoxRenderer("sepal_length").to_html(iris_df)) @@ -101,7 +111,7 @@ def box_renderer() -> None: from flytekitplugins.deck.renderer import ImageRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def image_renderer(image: FlyteFile) -> None: flytekit.Deck("Image Renderer", ImageRenderer().to_html(image_src=image)) @@ -117,7 +127,7 @@ def image_renderer_wf( from flytekitplugins.deck.renderer import TableRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def table_renderer() -> None: flytekit.Deck( "Table Renderer", From caadd269af894e50b88c911bbda38ffeaf0a1bb5 Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Fri, 4 Oct 2024 13:23:34 -0500 Subject: [PATCH 06/28] Updates for Flyte docs overhaul (#1726) * moved to flyte repo Signed-off-by: nikki everett * update refs Signed-off-by: nikki everett * update refs in examples Signed-off-by: nikki everett * configure pydata theme Signed-off-by: nikki everett * restructure integrations docs Signed-off-by: nikki everett * restructure tutorials Signed-off-by: nikki everett * fix contributing guide link Signed-off-by: nikki everett * remove double h1 Signed-off-by: nikki everett * remove unneceesary captions Signed-off-by: nikki everett * move deprecated integrations back to flytesnacks and remove unnecessary captions from tutorials and integrations toctrees Signed-off-by: nikki everett * fix regression Signed-off-by: nikki everett * linting Signed-off-by: nikki everett * update docs-requirements.txt from new docs-requirements.in Signed-off-by: nikki everett * update README Signed-off-by: nikki everett * Regenerate all requirements files using python 3.9 Signed-off-by: Eduardo Apolinario * Regenerate readthedocs requirements files using python 3.12 Signed-off-by: Eduardo Apolinario * fix formatting for deprecated plugins Signed-off-by: nikki everett * Add pandas to requirements.in in a bunch of places Signed-off-by: Eduardo Apolinario * Install pandas in e2e-tests Signed-off-by: Eduardo Apolinario * use ref for docs contributing guide Signed-off-by: nikki everett --------- Signed-off-by: nikki everett Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .github/workflows/checks.yml | 2 +- .readthedocs.yml | 2 +- README.md | 14 +- dev-requirements.txt | 307 +++++----- docs-requirements.in | 7 +- docs-requirements.txt | 531 ++++++------------ docs/conf.py | 12 +- docs/contribute.md | 359 ------------ docs/index.md | 139 +---- .../deprecated_integrations/index.md | 10 + .../index.md} | 114 ++-- .../bioinformatics/index.md} | 4 +- .../feature_engineering/index.md} | 10 +- .../flytelab/index.md} | 0 .../flytelab}/weather_forecasting.md | 0 docs/{tutorials.md => tutorials/index.md} | 35 +- .../model_training/index.md} | 22 +- .../airflow_plugin/airflow_plugin/airflow.py | 2 +- examples/bigquery_agent/requirements.in | 1 + examples/bigquery_plugin/README.md | 13 +- .../bigquery_plugin_example.py | 6 + examples/bigquery_plugin/requirements.in | 1 + examples/blast/requirements.in | 1 + examples/databricks_plugin/README.md | 13 +- .../databricks_plugin_example.py | 6 + .../development_lifecycle/requirements.in | 1 + examples/house_price_prediction/README.md | 2 +- .../house_price_prediction/requirements.in | 1 + examples/k8s_dask_plugin/README.md | 2 +- examples/k8s_pod_plugin/README.md | 2 +- examples/k8s_spark_plugin/README.md | 4 +- examples/kfpytorch_plugin/README.md | 2 +- examples/kftensorflow_plugin/README.md | 2 +- .../mmcloud_agent_example_usage.py | 2 +- examples/pima_diabetes/requirements.in | 1 + examples/ray_plugin/README.md | 2 +- examples/snowflake_plugin/README.md | 13 +- examples/snowflake_plugin/snowflake_plugin.md | 2 +- .../snowflake_plugin_example.py | 5 + examples/whylogs_plugin/requirements.in | 1 + 40 files changed, 550 insertions(+), 1103 deletions(-) delete mode 100644 docs/contribute.md create mode 100644 docs/integrations/deprecated_integrations/index.md rename docs/{integrations.md => integrations/index.md} (63%) rename docs/{bioinformatics_examples.md => tutorials/bioinformatics/index.md} (81%) rename docs/{feature_engineering.md => tutorials/feature_engineering/index.md} (62%) rename docs/{flyte_lab.md => tutorials/flytelab/index.md} (100%) rename docs/{ => tutorials/flytelab}/weather_forecasting.md (100%) rename docs/{tutorials.md => tutorials/index.md} (62%) rename docs/{ml_training.md => tutorials/model_training/index.md} (50%) diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index ead9fa466..f88c21953 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -293,7 +293,7 @@ jobs: pip install uv uv venv source .venv/bin/activate - uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow + uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow pandas pip freeze - name: Checkout flytesnacks uses: actions/checkout@v3 diff --git a/.readthedocs.yml b/.readthedocs.yml index b1825c215..796b75d7d 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,7 +7,7 @@ version: 2 build: os: "ubuntu-22.04" tools: - python: "3.11" + python: "3.12" apt_packages: - pandoc diff --git a/README.md b/README.md index bd2e73154..f6ad212d4 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,10 @@ Flyte Logo

- Flyte User Guide & Tutorials + Flyte tutorials and integrations examples

- Flytesnacks encompasses code examples showcasing Flytekit Python + Flytesnacks encompasses tutorials and integrations examples showcasing Flytekit Python

@@ -30,11 +30,9 @@ > To get the hang of Python SDK, refer to the [Getting Started](https://docs.flyte.org/en/latest/getting_started.html) tutorial before exploring the examples. -The [User Guide](https://docs.flyte.org/projects/cookbook/en/latest/index.html) section has code examples, tips, and tricks that showcase the usage of Flyte features and integrations. +The [Tutorials](https://docs.flyte.org/en/latest/flytesnacks/tutorials/index.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering. -The [Tutorials](https://docs.flyte.org/projects/cookbook/en/latest/tutorials.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering. - -the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integrations.html) section demonstrates how to use Flyte with other tools and frameworks. +the [Integrations](https://docs.flyte.org/en/latest/flytesnacks/integrations/index.html) section demonstrates how to use Flyte with other tools and frameworks. > Flytesnacks currently has all examples in Python (Flytekit Python SDK). @@ -44,7 +42,7 @@ the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integratio -You can find the detailed contribution guide [here](docs/contribute.md). +You can find the detailed contribution guide [here](https://docs.flyte.org/en/latest/community/contribute_docs.html#contributing-tutorials-and-integrations-examples).

@@ -52,4 +50,4 @@ You can find the detailed contribution guide [here](docs/contribute.md).

-Refer to the [issues](https://docs.flyte.org/en/latest/community/contribute.html##file-an-issue) section in the contribution guide if you'd like to file an issue. +Refer to the [issues](https://docs.flyte.org/en/latest/community/contribute.html#file-an-issue) section in the contribution guide if you'd like to file an issue. diff --git a/dev-requirements.txt b/dev-requirements.txt index 39a1cdb2d..6716a9987 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,188 +1,185 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile ./dev-requirements.in +# pip-compile dev-requirements.in # -adlfs==2023.10.0 +adlfs==2024.7.0 # via flytekit -aiobotocore==2.5.4 +aiobotocore==2.15.1 # via s3fs -aiohttp==3.9.1 +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.10.8 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp -arrow==1.3.0 - # via cookiecutter -async-timeout==4.0.3 - # via aiohttp -attrs==23.1.0 - # via aiohttp -autoflake==2.2.1 +attrs==24.2.0 + # via + # aiohttp + # jsonlines +autoflake==2.3.1 # via -r dev-requirements.in -azure-core==1.29.5 +azure-core==1.31.0 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.53 # via adlfs -azure-identity==1.15.0 +azure-identity==1.18.0 # via adlfs -azure-storage-blob==12.19.0 +azure-storage-blob==12.23.1 # via adlfs -binaryornot==0.4.4 - # via cookiecutter -botocore==1.31.17 +botocore==1.35.23 # via aiobotocore -cachetools==5.3.2 +cachetools==5.5.0 # via google-auth -certifi==2023.11.17 - # via - # kubernetes - # requests -cffi==1.16.0 +certifi==2024.8.30 + # via requests +cffi==1.17.1 # via # azure-datalake-store # cryptography cfgv==3.4.0 # via pre-commit -chardet==5.2.0 - # via binaryornot charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # cookiecutter + # dask # flytekit # rich-click cloudpickle==3.0.0 - # via flytekit -codespell==2.2.6 + # via + # dask + # flytekit +codespell==2.3.0 # via -r dev-requirements.in -cookiecutter==2.5.0 - # via flytekit -coverage==7.3.2 +coverage==7.6.1 # via -r dev-requirements.in -croniter==2.0.1 +croniter==3.0.3 # via flytekit -cryptography==41.0.7 +cryptography==43.0.1 # via # azure-identity # azure-storage-blob # msal # pyjwt - # secretstorage +dask[array,dataframe]==2024.9.1 + # via + # -r dev-requirements.in + # dask-expr +dask-expr==1.1.15 + # via dask dataclasses-json==0.5.9 # via flytekit decorator==5.1.1 # via gcsfs diskcache==5.6.3 # via flytekit -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -docker==6.1.3 +docker==7.1.0 # via flytekit -docstring-parser==0.15 +docstring-parser==0.16 # via flytekit -exceptiongroup==1.2.0 - # via pytest -filelock==3.13.1 +filelock==3.16.1 # via virtualenv -flyteidl==1.10.6 +flyteidl==1.13.4 # via flytekit -flytekit==1.10.2 +flytekit==1.13.7 # via -r dev-requirements.in -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.2 +fsspec==2024.9.0 + # via # adlfs + # dask # flytekit # gcsfs # s3fs -gcsfs==2023.9.2 +gcsfs==2024.9.0.post1 # via flytekit -google-api-core==2.15.0 +google-api-core==2.20.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.25.1 +google-auth==2.35.0 # via # gcsfs # google-api-core # google-auth-oauthlib # google-cloud-core # google-cloud-storage - # kubernetes -google-auth-oauthlib==1.1.0 +google-auth-oauthlib==1.2.1 # via gcsfs google-cloud-core==2.4.1 # via google-cloud-storage -google-cloud-storage==2.13.0 +google-cloud-storage==2.18.2 # via gcsfs -google-crc32c==1.5.0 +google-crc32c==1.6.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos==1.62.0 +googleapis-common-protos==1.65.0 # via # flyteidl # flytekit # google-api-core # grpcio-status -grpcio==1.60.0 + # protoc-gen-openapiv2 +grpcio==1.66.2 # via # flytekit # grpcio-status -grpcio-status==1.60.0 +grpcio-status==1.66.2 # via flytekit -identify==2.5.33 +identify==2.6.1 # via pre-commit -idna==3.6 +idna==3.10 # via # requests # yarl -importlib-metadata==7.0.0 - # via - # flytekit - # keyring +importlib-metadata==8.5.0 + # via flytekit iniconfig==2.0.0 # via pytest isodate==0.6.1 # via azure-storage-blob -jaraco-classes==3.3.0 +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 # via keyring -jeepney==0.8.0 - # via - # keyring - # secretstorage -jinja2==3.1.2 - # via cookiecutter jmespath==1.0.1 # via botocore -joblib==1.3.2 +joblib==1.4.2 # via flytekit -jsonpickle==3.0.2 +jsonlines==4.0.0 # via flytekit -keyring==24.3.0 +jsonpickle==3.3.0 # via flytekit -kubernetes==28.1.0 +keyring==25.4.1 # via flytekit +locket==1.0.0 + # via partd markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.3 - # via jinja2 -marshmallow==3.20.1 + # via + # flytekit + # rich +marshmallow==3.22.0 # via # dataclasses-json # marshmallow-enum @@ -193,7 +190,7 @@ marshmallow-enum==1.5.1 # flytekit marshmallow-jsonschema==0.13.0 # via flytekit -mashumaro==3.11 +mashumaro==3.13.1 # via # -r dev-requirements.in # flytekit @@ -201,157 +198,151 @@ mdurl==0.1.2 # via markdown-it-py mock==5.1.0 # via -r dev-requirements.in -more-itertools==10.1.0 - # via jaraco-classes -msal==1.26.0 +more-itertools==10.5.0 + # via + # jaraco-classes + # jaraco-functools +msal==1.31.0 # via # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.2.0 # via azure-identity -multidict==6.0.4 +multidict==6.1.0 # via # aiohttp # yarl -mypy==1.7.1 +mypy==1.11.2 # via -r dev-requirements.in mypy-extensions==1.0.0 # via # mypy # typing-inspect -nodeenv==1.8.0 +nodeenv==1.9.1 # via pre-commit -numpy==1.26.2 +numpy==2.1.1 # via - # flytekit + # dask # pandas # pyarrow + # scipy + # xgboost oauthlib==3.2.2 + # via requests-oauthlib +packaging==24.1 # via - # kubernetes - # requests-oauthlib -packaging==23.2 - # via - # docker + # dask # marshmallow # pytest -pandas==1.5.3 - # via flytekit -platformdirs==4.1.0 +pandas==2.2.3 # via - # virtualenv -pluggy==1.3.0 + # dask + # dask-expr +partd==1.4.2 + # via dask +platformdirs==4.3.6 + # via virtualenv +pluggy==1.5.0 # via pytest -portalocker==2.8.2 +portalocker==2.10.1 # via msal-extensions -pre-commit==3.5.0 +pre-commit==3.8.0 # via -r dev-requirements.in -protobuf==4.24.4 +proto-plus==1.24.0 + # via google-api-core +protobuf==5.28.2 # via # flyteidl # flytekit # google-api-core # googleapis-common-protos # grpcio-status - # protoc-gen-swagger -protoc-gen-swagger==0.1.0 + # proto-plus + # protoc-gen-openapiv2 +protoc-gen-openapiv2==0.0.1 # via flyteidl -pyarrow==14.0.1 - # via flytekit -pyasn1==0.5.1 +pyarrow==17.0.0 + # via dask-expr +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.1 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pygments==2.17.2 - # via rich -pyjwt[crypto]==2.8.0 +pyflakes==3.2.0 + # via autoflake +pygments==2.18.0 + # via + # flytekit + # rich +pyjwt[crypto]==2.9.0 # via # msal # pyjwt -pytest==7.4.3 +pytest==8.3.3 # via -r dev-requirements.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # arrow # botocore # croniter - # kubernetes # pandas python-json-logger==2.0.7 # via flytekit -python-slugify==8.0.1 - # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2023.3.post1 +pytz==2024.2 # via # croniter # pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via - # cookiecutter + # dask # flytekit - # kubernetes # pre-commit -requests==2.31.0 +requests==2.32.3 # via # azure-core # azure-datalake-store - # cookiecutter # docker # flytekit # gcsfs # google-api-core # google-cloud-storage - # kubernetes # msal # requests-oauthlib -requests-oauthlib==1.3.1 - # via - # google-auth-oauthlib - # kubernetes -rich==13.7.0 +requests-oauthlib==2.0.0 + # via google-auth-oauthlib +rich==13.9.1 # via - # cookiecutter # flytekit # rich-click -rich-click==1.7.2 +rich-click==1.8.3 # via flytekit rsa==4.9 # via google-auth -s3fs==2023.9.2 +s3fs==2024.9.0 # via flytekit -secretstorage==3.3.3 - # via keyring +scipy==1.14.1 + # via xgboost six==1.16.0 # via # azure-core # isodate - # kubernetes # python-dateutil -statsd==3.3.0 +statsd==4.0.1 # via flytekit -text-unidecode==1.3 - # via python-slugify -tomli==2.0.1 +toolz==0.12.1 # via - # autoflake - # black - # flake8-black - # mypy - # pytest -types-python-dateutil==2.8.19.14 - # via arrow -typing-extensions==4.8.0 + # dask + # partd +typing-extensions==4.12.2 # via # azure-core + # azure-identity # azure-storage-blob - # black # flytekit # mashumaro # mypy @@ -359,25 +350,21 @@ typing-extensions==4.8.0 # typing-inspect typing-inspect==0.9.0 # via dataclasses-json -urllib3==1.26.18 +tzdata==2024.2 + # via pandas +urllib3==2.2.3 # via # botocore # docker # flytekit - # kubernetes # requests -virtualenv==20.25.0 +virtualenv==20.26.6 # via pre-commit -websocket-client==1.7.0 - # via - # docker - # kubernetes wrapt==1.16.0 # via aiobotocore -yarl==1.9.4 +xgboost==2.1.1 + # via -r dev-requirements.in +yarl==1.13.1 # via aiohttp -zipp==3.17.0 +zipp==3.20.2 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/docs-requirements.in b/docs-requirements.in index 04c8ceb24..fb2cbba01 100644 --- a/docs-requirements.in +++ b/docs-requirements.in @@ -1,7 +1,7 @@ flytekit -fsspec<2023.10.0 +pydata-sphinx-theme +fsspec mashumaro -git+https://github.com/flyteorg/furo@main pillow psycopg2-binary flytekitplugins-deck-standard @@ -9,7 +9,6 @@ flytekitplugins-kfpytorch flytekitplugins-sqlalchemy jupytext sphinx -sphinx_rtd_theme sphinx-gallery sphinx-prompt sphinx-code-include @@ -27,6 +26,6 @@ grpcio grpcio-status myst-nb ipython!=8.7.0 -sphinx-tags==0.2.1 +sphinx-tags scikit-learn torch diff --git a/docs-requirements.txt b/docs-requirements.txt index 517adf4fd..8df2d7a24 100644 --- a/docs-requirements.txt +++ b/docs-requirements.txt @@ -1,88 +1,80 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# /Library/Developer/CommandLineTools/usr/bin/make docs-requirements.txt +# pip-compile docs-requirements.in # -adlfs==2023.10.0 +accessible-pygments==0.0.5 + # via pydata-sphinx-theme +adlfs==2024.7.0 # via flytekit -aiobotocore==2.5.4 +aiobotocore==2.15.1 # via s3fs -aiohttp==3.9.1 +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.10.8 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp -alabaster==0.7.15 +alabaster==1.0.0 # via sphinx -annotated-types==0.6.0 - # via pydantic -anyio==4.2.0 - # via azure-core -appnope==0.1.3 +appnope==0.1.4 # via ipykernel -arrow==1.3.0 - # via cookiecutter -astroid==3.0.2 +astroid==3.3.4 # via # -r docs-requirements.in # sphinx-autoapi asttokens==2.4.1 # via stack-data -async-timeout==4.0.3 - # via aiohttp -attrs==23.2.0 +attrs==24.2.0 # via # aiohttp + # jsonlines # jsonschema # jupyter-cache # referencing - # visions -azure-core==1.29.6 +azure-core==1.31.0 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.53 # via adlfs -azure-identity==1.15.0 +azure-identity==1.18.0 # via adlfs -azure-storage-blob==12.19.0 +azure-storage-blob==12.23.1 # via adlfs -babel==2.14.0 - # via sphinx -beautifulsoup4==4.12.2 +babel==2.16.0 # via - # furo + # pydata-sphinx-theme + # sphinx +beautifulsoup4==4.12.3 + # via + # pydata-sphinx-theme # sphinx-code-include -binaryornot==0.4.4 - # via cookiecutter -botocore==1.31.17 +botocore==1.35.23 # via aiobotocore -cachetools==5.3.2 +cachetools==5.5.0 # via google-auth -certifi==2023.11.17 +certifi==2024.8.30 # via # kubernetes # requests -cffi==1.16.0 + # sphinx-prompt +cffi==1.17.1 # via # azure-datalake-store # cryptography -cfgv==3.4.0 - # via pre-commit -chardet==5.2.0 - # via binaryornot charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # cookiecutter # flytekit # jupyter-cache # rich-click @@ -90,29 +82,19 @@ cloudpickle==3.0.0 # via # flytekit # flytekitplugins-kfpytorch -comm==0.2.1 - # via - # ipykernel - # ipywidgets -contourpy==1.2.0 - # via matplotlib -cookiecutter==2.5.0 - # via flytekit -croniter==2.0.1 +comm==0.2.2 + # via ipykernel +croniter==3.0.3 # via flytekit -cryptography==41.0.7 +cryptography==43.0.1 # via # azure-identity # azure-storage-blob # msal # pyjwt -cycler==0.12.1 - # via matplotlib -dacite==1.8.1 - # via ydata-profiling dataclasses-json==0.5.9 # via flytekit -debugpy==1.8.0 +debugpy==1.8.6 # via ipykernel decorator==5.1.1 # via @@ -120,54 +102,47 @@ decorator==5.1.1 # ipython diskcache==5.6.3 # via flytekit -distlib==0.3.8 - # via virtualenv -docker==6.1.3 +docker==7.1.0 # via flytekit -docstring-parser==0.15 +docstring-parser==0.16 # via flytekit -docutils==0.17.1 +docutils==0.21.2 # via # myst-parser + # pydata-sphinx-theme # sphinx # sphinx-panels - # sphinx-rtd-theme + # sphinx-prompt # sphinx-tabs -exceptiongroup==1.2.0 - # via - # anyio - # ipython -executing==2.0.1 +durationpy==0.9 + # via kubernetes +executing==2.1.0 # via stack-data -fastjsonschema==2.19.1 +fastjsonschema==2.20.0 # via nbformat -filelock==3.13.1 - # via - # torch - # virtualenv -flyteidl==1.10.6 +filelock==3.16.1 + # via torch +flyteidl==1.13.4 # via # flytekit # flytekitplugins-kfpytorch -flytekit==1.10.2 +flytekit==1.13.7 # via # -r docs-requirements.in # flytekitplugins-deck-standard # flytekitplugins-kfpytorch # flytekitplugins-sqlalchemy -flytekitplugins-deck-standard==1.10.2 +flytekitplugins-deck-standard==1.13.7 # via -r docs-requirements.in -flytekitplugins-kfpytorch==1.10.2 +flytekitplugins-kfpytorch==1.13.7 # via -r docs-requirements.in -flytekitplugins-sqlalchemy==1.10.2 +flytekitplugins-sqlalchemy==1.13.7 # via -r docs-requirements.in -fonttools==4.47.0 - # via matplotlib frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.2 +fsspec==2024.9.0 # via # -r docs-requirements.in # adlfs @@ -175,15 +150,13 @@ fsspec==2023.9.2 # gcsfs # s3fs # torch -furo @ git+https://github.com/flyteorg/furo@main - # via -r docs-requirements.in -gcsfs==2023.9.2 +gcsfs==2024.9.0.post1 # via flytekit -google-api-core==2.15.0 +google-api-core==2.20.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.26.1 +google-auth==2.35.0 # via # gcsfs # google-api-core @@ -191,127 +164,111 @@ google-auth==2.26.1 # google-cloud-core # google-cloud-storage # kubernetes -google-auth-oauthlib==1.2.0 +google-auth-oauthlib==1.2.1 # via gcsfs google-cloud-core==2.4.1 # via google-cloud-storage -google-cloud-storage==2.14.0 +google-cloud-storage==2.18.2 # via gcsfs -google-crc32c==1.5.0 +google-crc32c==1.6.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.7.0 +google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos==1.62.0 +googleapis-common-protos==1.65.0 # via # flyteidl # flytekit # google-api-core # grpcio-status -grpcio==1.60.0 + # protoc-gen-openapiv2 +grpcio==1.66.2 # via # -r docs-requirements.in # flytekit # grpcio-status -grpcio-status==1.60.0 +grpcio-status==1.66.2 # via # -r docs-requirements.in # flytekit -htmlmin==0.1.12 - # via ydata-profiling -identify==2.5.33 - # via pre-commit -idna==3.6 +idna==3.10 # via - # anyio # requests + # sphinx-prompt # yarl -imagehash==4.3.1 - # via - # visions - # ydata-profiling imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.1 +importlib-metadata==8.5.0 # via # flytekit # jupyter-cache - # keyring # myst-nb -ipykernel==6.28.0 +ipykernel==6.29.5 # via myst-nb -ipython==8.20.0 +ipython==8.28.0 # via # -r docs-requirements.in # ipykernel - # ipywidgets # myst-nb -ipywidgets==8.1.1 - # via flytekitplugins-deck-standard isodate==0.6.1 # via azure-storage-blob -jaraco-classes==3.3.0 +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 # via keyring jedi==0.19.1 # via ipython -jinja2==3.0.3 +jinja2==3.1.4 # via - # cookiecutter # myst-parser # sphinx # sphinx-autoapi - # sphinx-tabs # torch - # ydata-profiling jmespath==1.0.1 # via botocore -joblib==1.3.2 +joblib==1.4.2 # via # flytekit - # phik # scikit-learn -jsonpickle==3.0.2 +jsonlines==4.0.0 + # via flytekit +jsonpickle==3.3.0 # via flytekit -jsonschema==4.20.0 +jsonschema==4.23.0 # via nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-cache==0.6.1 +jupyter-cache==1.0.0 # via myst-nb -jupyter-client==8.6.0 +jupyter-client==8.6.3 # via # ipykernel # nbclient -jupyter-core==5.7.1 +jupyter-core==5.7.2 # via # ipykernel # jupyter-client # nbclient # nbformat -jupyterlab-widgets==3.0.9 - # via ipywidgets -jupytext==1.16.0 +jupytext==1.16.4 # via -r docs-requirements.in -keyring==24.3.0 +keyring==25.4.1 # via flytekit -kiwisolver==1.4.5 - # via matplotlib -kubernetes==29.0.0 - # via flytekit -llvmlite==0.41.1 - # via numba -markdown==3.5.1 - # via flytekitplugins-deck-standard -markdown-it-py==2.2.0 +kubernetes==31.0.0 + # via flytekitplugins-kfpytorch +markdown-it-py==3.0.0 # via + # flytekit # jupytext # mdit-py-plugins # myst-parser # rich -markupsafe==2.1.3 +markupsafe==2.1.5 # via jinja2 -marshmallow==3.20.1 +marshmallow==3.22.0 # via # dataclasses-json # marshmallow-enum @@ -322,233 +279,170 @@ marshmallow-enum==1.5.1 # flytekit marshmallow-jsonschema==0.13.0 # via flytekit -mashumaro==3.11 +mashumaro==3.13.1 # via # -r docs-requirements.in # flytekit -matplotlib==3.8.2 - # via - # phik - # seaborn - # wordcloud - # ydata-profiling -matplotlib-inline==0.1.6 +matplotlib-inline==0.1.7 # via # ipykernel # ipython -mdit-py-plugins==0.3.5 +mdit-py-plugins==0.4.2 # via # jupytext # myst-parser mdurl==0.1.2 # via markdown-it-py -more-itertools==10.2.0 - # via jaraco-classes +more-itertools==10.5.0 + # via + # jaraco-classes + # jaraco-functools mpmath==1.3.0 # via sympy -msal==1.26.0 +msal==1.31.0 # via # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.1.0 +msal-extensions==1.2.0 # via azure-identity -multidict==6.0.4 +multidict==6.1.0 # via # aiohttp # yarl -multimethod==1.10 - # via - # visions - # ydata-profiling mypy-extensions==1.0.0 # via typing-inspect -myst-nb==0.17.2 +myst-nb==1.1.2 # via -r docs-requirements.in -myst-parser==0.18.1 +myst-parser==4.0.0 # via myst-nb -nbclient==0.7.4 +nbclient==0.10.0 # via # jupyter-cache # myst-nb -nbformat==5.9.2 +nbformat==5.10.4 # via # jupyter-cache # jupytext # myst-nb # nbclient -nest-asyncio==1.5.8 +nest-asyncio==1.6.0 # via ipykernel -networkx==3.2.1 +networkx==3.3 + # via torch +numpy==2.1.1 # via - # torch - # visions -nodeenv==1.8.0 - # via pre-commit -numba==0.58.1 - # via ydata-profiling -numpy==1.25.2 - # via - # contourpy - # flytekit - # imagehash - # matplotlib - # numba # pandas - # patsy - # phik - # pyarrow - # pywavelets # scikit-learn # scipy - # seaborn - # statsmodels - # visions - # wordcloud - # ydata-profiling oauthlib==3.2.2 # via # kubernetes # requests-oauthlib -packaging==23.2 +packaging==24.1 # via - # docker # ipykernel # jupytext # marshmallow - # matplotlib - # msal-extensions - # plotly + # pydata-sphinx-theme # sphinx - # statsmodels -pandas==1.5.3 - # via - # flytekit - # phik - # seaborn - # statsmodels - # visions - # ydata-profiling -parso==0.8.3 +pandas==2.2.3 + # via flytekitplugins-sqlalchemy +parso==0.8.4 # via jedi -patsy==0.5.6 - # via statsmodels pexpect==4.9.0 # via ipython -phik==0.12.3 - # via ydata-profiling -pillow==10.2.0 +pillow==10.4.0 # via # -r docs-requirements.in - # imagehash - # matplotlib # sphinx-gallery - # visions - # wordcloud -platformdirs==4.1.0 - # via - # jupyter-core - # virtualenv -plotly==5.18.0 - # via flytekitplugins-deck-standard -portalocker==2.8.2 +platformdirs==4.3.6 + # via jupyter-core +portalocker==2.10.1 # via msal-extensions -pre-commit==3.6.0 - # via sphinx-tags -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.48 # via ipython -protobuf==4.24.4 +proto-plus==1.24.0 + # via google-api-core +protobuf==5.28.2 # via # flyteidl # flytekit # google-api-core # googleapis-common-protos # grpcio-status - # protoc-gen-swagger -protoc-gen-swagger==0.1.0 + # proto-plus + # protoc-gen-openapiv2 +protoc-gen-openapiv2==0.0.1 # via flyteidl -psutil==5.9.7 +psutil==6.0.0 # via ipykernel psycopg2-binary==2.9.9 # via -r docs-requirements.in ptyprocess==0.7.0 # via pexpect -pure-eval==0.2.2 +pure-eval==0.2.3 # via stack-data -pyarrow==14.0.2 - # via flytekit -pyasn1==0.5.1 +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.1 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pydantic==2.5.3 - # via ydata-profiling -pydantic-core==2.14.6 - # via pydantic -pygments==2.17.2 +pydata-sphinx-theme==0.15.4 + # via -r docs-requirements.in +pygments==2.18.0 # via - # furo + # accessible-pygments + # flytekit # ipython + # pydata-sphinx-theme # rich # sphinx # sphinx-prompt # sphinx-tabs -pyjwt[crypto]==2.8.0 +pyjwt[crypto]==2.9.0 # via # msal # pyjwt -pyparsing==3.1.1 - # via matplotlib -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # arrow # botocore # croniter # jupyter-client # kubernetes - # matplotlib # pandas python-json-logger==2.0.7 # via flytekit -python-slugify==8.0.1 - # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2023.3.post1 +pytz==2024.2 # via # croniter # pandas -pywavelets==1.5.0 - # via imagehash -pyyaml==6.0.1 +pyyaml==6.0.2 # via - # cookiecutter # flytekit # jupyter-cache # jupytext # kubernetes # myst-nb # myst-parser - # pre-commit # sphinx-autoapi - # ydata-profiling -pyzmq==25.1.2 +pyzmq==26.2.0 # via # ipykernel # jupyter-client -referencing==0.32.1 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -requests==2.31.0 +requests==2.32.3 # via # azure-core # azure-datalake-store - # cookiecutter # docker # flytekit # gcsfs @@ -559,61 +453,48 @@ requests==2.31.0 # requests-oauthlib # sphinx # sphinxcontrib-youtube - # ydata-profiling -requests-oauthlib==1.3.1 +requests-oauthlib==2.0.0 # via # google-auth-oauthlib # kubernetes -rich==13.7.0 +rich==13.9.1 # via - # cookiecutter # flytekit # rich-click -rich-click==1.7.3 +rich-click==1.8.3 # via flytekit -rpds-py==0.16.2 +rpds-py==0.20.0 # via # jsonschema # referencing rsa==4.9 # via google-auth -s3fs==2023.9.2 +s3fs==2024.9.0 # via flytekit -scikit-learn==1.3.2 +scikit-learn==1.5.2 # via -r docs-requirements.in -scipy==1.11.4 - # via - # imagehash - # phik - # scikit-learn - # statsmodels - # ydata-profiling -seaborn==0.12.2 - # via ydata-profiling +scipy==1.14.1 + # via scikit-learn six==1.16.0 # via # asttokens # azure-core # isodate # kubernetes - # patsy # python-dateutil # sphinx-code-include # sphinxext-remoteliteralinclude -sniffio==1.3.0 - # via anyio snowballstemmer==2.2.0 # via sphinx -soupsieve==2.5 +soupsieve==2.6 # via beautifulsoup4 -sphinx==4.5.0 +sphinx==8.0.2 # via # -r docs-requirements.in - # furo # myst-nb # myst-parser + # pydata-sphinx-theme # sphinx-autoapi - # sphinx-basic-ng # sphinx-code-include # sphinx-copybutton # sphinx-fontawesome @@ -621,154 +502,112 @@ sphinx==4.5.0 # sphinx-panels # sphinx-prompt # sphinx-reredirects - # sphinx-rtd-theme # sphinx-tabs # sphinx-tags - # sphinxcontrib-jquery # sphinxcontrib-youtube # sphinxext-remoteliteralinclude -sphinx-autoapi==2.0.1 +sphinx-autoapi==3.3.2 # via -r docs-requirements.in -sphinx-basic-ng==1.0.0b2 - # via furo -sphinx-code-include==1.1.1 +sphinx-code-include==1.4.0 # via -r docs-requirements.in sphinx-copybutton==0.5.2 # via -r docs-requirements.in sphinx-fontawesome==0.0.6 # via -r docs-requirements.in -sphinx-gallery==0.15.0 +sphinx-gallery==0.17.1 # via -r docs-requirements.in -sphinx-panels==0.6.0 +sphinx-panels==0.4.1 # via -r docs-requirements.in -sphinx-prompt==1.5.0 +sphinx-prompt==1.9.0 # via -r docs-requirements.in -sphinx-reredirects==0.1.3 +sphinx-reredirects==0.1.5 # via -r docs-requirements.in -sphinx-rtd-theme==1.3.0 +sphinx-tabs==3.4.5 # via -r docs-requirements.in -sphinx-tabs==3.4.0 +sphinx-tags==0.4 # via -r docs-requirements.in -sphinx-tags==0.2.1 - # via -r docs-requirements.in -sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx -sphinxcontrib-jquery==4.1 - # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-mermaid==0.9.2 # via -r docs-requirements.in -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -sphinxcontrib-youtube==1.3.0 +sphinxcontrib-youtube==1.4.1 # via -r docs-requirements.in sphinxext-remoteliteralinclude==0.4.0 # via -r docs-requirements.in -sqlalchemy==2.0.25 +sqlalchemy==2.0.35 # via # flytekitplugins-sqlalchemy # jupyter-cache stack-data==0.6.3 # via ipython -statsd==3.3.0 +statsd==4.0.1 # via flytekit -statsmodels==0.14.1 - # via ydata-profiling -sympy==1.12 +sympy==1.13.3 # via torch tabulate==0.9.0 # via jupyter-cache -tangled-up-in-unicode==0.2.0 - # via visions -tenacity==8.2.3 - # via plotly -text-unidecode==1.3 - # via python-slugify -threadpoolctl==3.2.0 +threadpoolctl==3.5.0 # via scikit-learn -toml==0.10.2 - # via jupytext -torch==2.1.2 +torch==2.4.1 # via -r docs-requirements.in -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client -tqdm==4.66.1 - # via ydata-profiling -traitlets==5.14.1 +traitlets==5.14.3 # via # comm # ipykernel # ipython - # ipywidgets # jupyter-client # jupyter-core # matplotlib-inline # nbclient # nbformat -typeguard==4.1.5 - # via ydata-profiling -types-python-dateutil==2.8.19.20240106 - # via arrow -typing-extensions==4.9.0 - # via - # anyio - # astroid +typing-extensions==4.12.2 + # via # azure-core + # azure-identity # azure-storage-blob # flytekit # mashumaro # myst-nb - # myst-parser - # pydantic - # pydantic-core + # pydata-sphinx-theme # rich-click # sqlalchemy # torch - # typeguard # typing-inspect typing-inspect==0.9.0 # via dataclasses-json -unidecode==1.3.7 - # via sphinx-autoapi -urllib3==1.26.18 +tzdata==2024.2 + # via pandas +urllib3==2.2.3 # via # botocore # docker # flytekit # kubernetes # requests -virtualenv==20.25.0 - # via pre-commit -visions[type-image-path]==0.7.5 - # via - # visions - # ydata-profiling + # sphinx-prompt wcwidth==0.2.13 # via prompt-toolkit -websocket-client==1.7.0 - # via - # docker - # kubernetes -widgetsnbextension==4.0.9 - # via ipywidgets -wordcloud==1.9.3 - # via ydata-profiling +websocket-client==1.8.0 + # via kubernetes wrapt==1.16.0 # via aiobotocore -yarl==1.9.4 +yarl==1.13.1 # via aiohttp -ydata-profiling==4.6.4 - # via flytekitplugins-deck-standard -zipp==3.17.0 +zipp==3.20.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/docs/conf.py b/docs/conf.py index d8ce10fca..9853316bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -97,7 +97,12 @@ ] include_patterns = [ + "index.md", + "_tags/tagsindex", + "*.md", + "**/*.md", "auto_examples/**/index.md", + "auto_examples/**/*.md", ] # The master toctree document. @@ -121,7 +126,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "furo" +html_theme = "pydata_sphinx_theme" html_title = "Flyte" announcement = """ @@ -129,6 +134,7 @@ Please visit the new documentation here. """ +""" html_theme_options = { "light_css_variables": { "color-brand-primary": "#4300c9", @@ -148,6 +154,7 @@ "docs_path": "docs", # path to documentation source "announcement": announcement, } +""" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -183,9 +190,6 @@ "extending", "productionizing", "testing", - "bigquery_plugin", - "databricks_plugin", - "snowflake_plugin", ] # intersphinx configuration diff --git a/docs/contribute.md b/docs/contribute.md deleted file mode 100644 index 2bd45bff3..000000000 --- a/docs/contribute.md +++ /dev/null @@ -1,359 +0,0 @@ -# Contributing to User Guide, Tutorials and Integrations - -```{eval-rst} -.. tags:: Contribute, Basic -``` - -The examples documentation provides an easy way for the community to learn about the rich set of -features that Flyte offers, and we are constantly improving them with your help! - -Whether you're a novice or experienced software engineer, data scientist, or machine learning -practitioner, all contributions are welcome! - -## How to contribute - -The Flyte documentation examples guides are broken up into three types: - -1. {ref}`User Guides `: These are short, simple guides that demonstrate how to use a particular Flyte feature. The Markdown files live in the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide), while the example code lives in flytesnacks. - These examples should be runnable locally. **Note:** The comments in the user guide Python files **will not** be rendered as user-facing docs. To update the user-facing documentation, open a pull request in the flyte repo. -2. {ref}`Tutorials `: These are longer, more advanced guides that use multiple Flyte features to solve - real-world problems. Tutorials are generally more complex examples that may require extra setup or that can only run - on larger clusters. -3. {ref}`Integrations `: These examples showcase how to use the Flyte plugins that integrate with the - broader data and ML ecosystem. - -The first step to contributing an example is to open up a -[documentation issue](https://github.com/flyteorg/flyte/issues/new?assignees=&labels=documentation%2Cuntriaged&template=docs_issue.yaml&title=%5BDocs%5D+) -to articulate the kind of example you want to write. The Flyte maintainers will guide and help you figure out where your example would fit best. - -## Creating an example - -:::{admonition} Prerequisites -Follow the {ref}`env_setup` guide to get your development environment ready. -::: - -The `flytesnacks` repo examples live in the `examples` directory, where each -subdirectory contains a self-contained example project that covers a particular -feature, integration, or use case. - -```{code-block} bash -examples -├── README.md -├── airflow_plugin -├── athena_plugin -├── aws_batch_plugin -├── basics -├── bigquery_plugin -... -``` - -### Adding an example script to an existing project - -If you're adding a new example to an existing project, you can simply create a -new `.py` file in the appropriate directory. For example, if you want to add a new -example in the `examples/basics` project, simply do: - -```{prompt} bash -touch examples/basics/my_new_example.py -``` - -If you are creating a new user guide example, you can reference the code in the user guide documentation using the `rli` (remoteliteralinclude) directive. - -If you are creating a new integration or tutorial example, add the example to the `README.md` file of the -example project as an entry in the `auto-examples-toc` directive: - -````{code-block} -```{auto-examples-toc} -... -my_new_example -``` -```` - -### Creating a new example project - -````{important} -If you're creating a new example in the User Guide, Tutorials, or Integrations -that doesn't fit into any of the existing subdirectories, you'll need to setup a -new example project. - -In the `flytesnacks` root directory, create one with: - -```{prompt} bash -./scripts/create-example-project new_example_project -``` - -This will create a new directory under `examples`: - -```{code-block} bash -examples/new_example_project -├── Dockerfile -├── README.md -├── new_example_project -│   ├── __init__.py -│   └── example.py -├── requirements.in -└── requirements.txt -``` - -```` - -### Creating python examples - -#### User guide examples - -If you are writing a user guide example, write your example Python script in regular Python, with regular comments. These comments **will not** be extracted from the Python file and turned into user-facing documentation. To update user-facing user guide documentation, edit the user guide files in the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide). You can use the `rli` ([remoteliteralinclude](https://github.com/wpilibsuite/sphinxext-remoteliteralinclude/blob/main/README.md)) directive to include snippets of code from your example Python file. - -#### Tutorial or integration examples - -If you are writing a tutorial or integration example, write your example Python script in [percent format](https://jupytext.readthedocs.io/en/latest/formats.html#the-percent-format), -which allows you to interleave python code and markdown in the same file. Each -code cell should be delimited by `# %%`, and each markdown cell should be -delimited with `# %% [markdown]`. - -```{code-block} python -# %% -print("Hello World!") - -# %% [markdown] -# This is a markdown cell - -# %% -print("This is another code cell") -``` - -Markdown cells have access to sphinx directives through the -[myst markdown](https://myst-parser.readthedocs.io/en/latest/) format, -which is a flavor of markdown that makes it easier to write documentation while -giving you the utilities of sphinx. `flytesnacks` uses the -[myst-nb](https://myst-nb.readthedocs.io/en/latest/) and -[jupytext](https://github.com/mwouts/jupytext) packages to interpret the -python files as rst-compatible files. - -### Writing examples: explain what the code does - -Following the [literate programming](https://en.wikipedia.org/wiki/Literate_programming) paradigm, make sure to -interleave explanations in the `*.py` files containing the code example. - -:::{admonition} A Simple Example -:class: tip - -Here's a code snippet that defines a function that takes two positional arguments and one keyword argument: - -```python -def function(x, y, z=3): - return x + y * z -``` - -As you can see, `function` adds the two first arguments and multiplies the sum with the third keyword -argument. Can you think of a better name for this `function`? -::: - -Explanations don't have to be this detailed for such a simple example, but you can imagine how this makes for a better -reading experience for more complicated examples. - -### Creating examples in other formats - -Writing examples in `.py` files is preferred since they are easily tested and -packaged, but `flytesnacks` also supports examples written in `.ipynb` and -`.md` files in myst markdown format. This is useful in the following cases: - -- `.ipynb`: When a `.py` example needs a companion jupyter notebook as a task, e.g. - to illustrate the use of {py:class}`~flytekitplugins.papermill.NotebookTask`s, - or when an example is intended to be run from a notebook. -- `.md`: When a piece of documentation doesn't require testable or packaged - flyte tasks/workflows, an example page can be written as a myst markdown file. - -**Note:** If you want to add Markdown files to a user guide example project, add them to the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide) instead. - -## Writing a README - -The `README.md` file needs to capture the _what_, _why_, and _how_ of the example. - -- What is the integration about? Its features, etc. -- Why do we need this integration? How is it going to benefit the Flyte users? -- Showcase the uniqueness of the integration -- How to install the plugin? - -Finally, **for tutorials and integrations only**, write a `auto-examples-toc` directive at the bottom of the file: - -````{code-block} -```{auto-examples-toc} -example_01 -example_02 -example_03 -``` -```` - -Where `example_01`, `example_02`, and `example_03` are the python module -names of the examples under the `new_example_project` directory. These can also -be the names of the `.ipynb` or `.md` files (but without the file extension). - -:::{tip} -Refer to any subdirectory in the `examples` directory -::: - -## Test your code - -If the example code can be run locally, just use `python .py` to run it. - -### Testing on a cluster - -Install {doc}`flytectl `, the commandline interface for flyte. - -:::{note} -Learn more about installation and configuration of Flytectl [here](https://docs.flyte.org/en/latest/flytectl/docs_index.html). -::: - -Start a Flyte demo cluster with: - -``` -flytectl demo start -``` - -### Testing the `basics` project examples on a local demo cluster - -In this example, we'll build the `basics` project: - -```{prompt} bash -# from flytesnacks root directory -cd examples/basics -``` - -Build the container: - -```{prompt} bash -docker build . --tag "basics:v1" -f Dockerfile -``` - -Package the examples by running: - -```{prompt} bash -pyflyte --pkgs basics package --image basics:v1 -f -``` - -Register the examples by running - -```{prompt} bash -flytectl register files \ - -p flytesnacks \ - -d development \ - --archive flyte-package.tgz \ - --version v1 -``` - -Visit `https://localhost:30081/console` to view the Flyte console, which consists -of the examples present in the `flytesnacks/core` directory. - -### Updating dependencies - -:::{admonition} Prerequisites -Install [pip-tools](https://pypi.org/project/pip-tools/) in your development -environment with: - -```{prompt} bash -pip install pip-tools -``` - -::: - -If you've updated the dependencies of the project, update the `requirements.txt` -file by running: - -```{prompt} bash -pip-compile requirements.in --upgrade --verbose --resolver=backtracking -``` - -### Rebuild the image - -If you've updated the source code or dependencies of the project, and rebuild -the image with: - -```{prompt} bash -docker build . --tag "basics:v2" -f core/Dockerfile -pyflyte --pkgs basics package --image basics:v2 -f -flytectl register files \ - -p flytesnacks \ - -d development \ - --archive flyte-package.tgz \ - --version v2 -``` - -Refer to {ref}`this guide ` -if the code in itself is updated and requirements.txt is the same. - -## Pre-commit hooks - -We use [pre-commit](https://pre-commit.com/) to automate linting and code formatting on every commit. -Configured hooks include [ruff](https://github.com/astral-sh/ruff) to ensure newlines are added to the end of files, and there is proper spacing in files. - -We run all those hooks in CI, but if you want to run them locally on every commit, run `pre-commit install` after -installing the dev environment requirements. In case you want to disable `pre-commit` hooks locally, run -`pre-commit uninstall`. More info [here](https://pre-commit.com/). - -### Formatting - -We use [ruff](https://github.com/astral-sh/ruff) to autoformat code. They -are configured as git hooks in `pre-commit`. Run `make fmt` to format your code. - -### Spell-checking - -We use [codespell](https://github.com/codespell-project/codespell) to catch common misspellings. Run -`make spellcheck` to spell-check the changes. - -## Update Documentation Pages - -The `docs/conf.py` contains the sphinx configuration for building the -`flytesnacks` documentation. - -At build-time, the `flytesnacks` sphinx build system will convert all of the -projects in the `examples` directory into `docs/auto_examples`, and will be -available in the documentation. - -::::{important} - -The docs build system will convert the `README.md` files in each example -project into a `index.md` file, so you can reference the root page of each -example project, e.g., in myst markdown format, you can write a table-of-content -directive like so: - -:::{code-block} - -```{toc} -auto_examples/basics/index -``` - -::: - -:::: - -If you've created a new example project, you'll need to add the `index` page -in the table of contents in `docs/index.md` to make sure the project -shows up in the documentation. Additonally, you'll need to update the appropriate -`list-table` directive in `docs/userguide.md`, `docs/tutorials.md`, or -`docs/integrations.md` so that it shows up in the respective section of the -documentation. - -## Build the documentation locally - -Verify that the code and documentation look as expected: - -- Learn about the documentation tools [here](https://docs.flyte.org/en/latest/community/contribute.html#documentation) -- Install the requirements by running `pip install -r docs-requirements.txt`. -- Run `make -C docs html` - - ```{tip} - To run a fresh build, run `make -C docs clean html`. - ``` - -- Open the HTML pages present in the `docs/_build` directory in the browser with - `open docs/_build/index.html` - -## Create a pull request - -Create the pull request, then ensure that the docs are rendered correctly by clicking on the documentation check. - -```{image} https://raw.githubusercontent.com/flyteorg/static-resources/main/common/test_docs_link.png -:alt: Docs link in a PR -``` - -You can refer to [this PR](https://github.com/flyteorg/flytesnacks/pull/332) for the exact changes required. diff --git a/docs/index.md b/docs/index.md index 027058e5c..335236f8f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,145 +12,14 @@ next-page-title: Quickstart guide (getting_started_index)= -# Introduction to Flyte +# Flytesnacks -Flyte is a workflow orchestrator that unifies machine learning, data engineering, and data analytics stacks for building robust and reliable applications. Flyte features: - -- Reproducible, repeatable workflows -- Strongly typed interfaces -- Structured datasets to enable easy conversion of dataframes between types, and column-level type checking -- Easy movement of data between local and cloud storage -- Easy tracking of data lineages -- Built-in data and artifact visualization - -For a full list of feature, see the [Flyte features page](https://flyte.org/features). - -## Basic Flyte components - -Flyte is made up of a user plane, control plane, and data plane. - -- The **user plane** contains the elements you need to develop the code that will implement your application's directed acyclic graph (DAG). These elements are FlyteKit and Flytectl. Data scientists and machine learning engineers primarily work in the user plane. -- The **control plane** is part of the Flyte backend that is configured by platform engineers or others tasked with setting up computing infrastructure. It consists of FlyteConsole and FlyteAdmin, which serves as the main Flyte API to process requests from clients in the user plane. The control plane sends workflow execution requests to the data plane for execution, and stores information such as current and past running workflows, and provides that information upon request. -- The **data plane** is another part of the Flyte backend that contains FlytePropeller, the core engine of Flyte that executes workflows. FlytePropeller is designed as a [Kubernetes Controller](https://kubernetes.io/docs/concepts/architecture/controller/). The data plane sends status events back to the control plane so that information can be stored and surfaced to end users. - -## Next steps - -- To quickly try out Flyte on your machine, follow the {ref}`Quickstart guide `. -- To create a Flyte project that can be used to package workflow code for deployment to a Flyte cluster, see {ref}`"Getting started with workflow development" `. -- To set up a Flyte cluster, see the [Deployment documentation](https://docs.flyte.org/en/latest/deployment/index.html). - -```{toctree} -:maxdepth: 1 -:hidden: - -Getting Started -User Guide -Tutorials -Concepts -Deployment -API Reference -Community -``` - -```{toctree} -:maxdepth: -1 -:caption: Getting Started -:hidden: - -Introduction to Flyte -Quickstart guide -Getting started with workflow development -Flyte fundamentals -Core use cases -``` - -```{toctree} -:maxdepth: -1 -:caption: User Guide -:hidden: - -📖 User Guide -🌳 Environment Setup -🔤 Basics -⌨️ Data Types and IO -🔮 Advanced Composition -🧩 Customizing Dependencies -🏡 Development Lifecycle -⚗️ Testing -🚢 Productionizing -🏗 Extending -📝 Contributing -``` - -```{toctree} -:maxdepth: -1 -:caption: Tutorials -:hidden: - -Tutorials -Model Training -feature_engineering -bioinformatics_examples -flyte_lab -``` - -```{toctree} -:maxdepth: -1 -:caption: Integrations -:hidden: - -Integrations -auto_examples/airflow_agent/index -auto_examples/airflow_plugin/index -auto_examples/athena_plugin/index -auto_examples/aws_batch_plugin/index -auto_examples/bigquery_agent/index -auto_examples/chatgpt_agent/index -auto_examples/k8s_dask_plugin/index -auto_examples/databricks_agent/index -auto_examples/dbt_plugin/index -auto_examples/dolt_plugin/index -auto_examples/duckdb_plugin/index -auto_examples/flyteinteractive_plugin/index -auto_examples/greatexpectations_plugin/index -auto_examples/hive_plugin/index -auto_examples/k8s_pod_plugin/index -auto_examples/mlflow_plugin/index -auto_examples/mmcloud_agent/index -auto_examples/modin_plugin/index -auto_examples/kfmpi_plugin/index -auto_examples/nim_plugin/index -auto_examples/ollama_plugin/index -auto_examples/onnx_plugin/index -auto_examples/openai_batch_agent/index -auto_examples/papermill_plugin/index -auto_examples/pandera_plugin/index -auto_examples/kfpytorch_plugin/index -auto_examples/ray_plugin/index -auto_examples/sagemaker_inference_agent/index -auto_examples/sensor/index -auto_examples/snowflake_agent/index -auto_examples/k8s_spark_plugin/index -auto_examples/sql_plugin/index -auto_examples/kftensorflow_plugin/index -auto_examples/whylogs_plugin/index -``` - -```{toctree} -:maxdepth: -1 -:caption: Deprecated integrations -:hidden: - -Deprecated integrations -BigQuery plugin -Databricks plugin -Snowflake plugin -``` +Welcome to Flytesnacks. ```{toctree} :maxdepth: -1 -:caption: Tags :hidden: -_tags/tagsindex +Tutorials +Integrations ``` diff --git a/docs/integrations/deprecated_integrations/index.md b/docs/integrations/deprecated_integrations/index.md new file mode 100644 index 000000000..ff5c12e2a --- /dev/null +++ b/docs/integrations/deprecated_integrations/index.md @@ -0,0 +1,10 @@ +# Deprecated integrations + +```{toctree} +:maxdepth: -1 +:hidden: + +BigQuery plugin
+Databricks plugin
+Snowflake plugin +``` diff --git a/docs/integrations.md b/docs/integrations/index.md similarity index 63% rename from docs/integrations.md rename to docs/integrations/index.md index b763017ad..b1cd315ac 100644 --- a/docs/integrations.md +++ b/docs/integrations/index.md @@ -5,7 +5,7 @@ Flyte is designed to be highly extensible and can be customized in multiple ways. ```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. +Want to contribute an example? Check out the {ref}`Documentation contribution guide `. ``` ## Flytekit Plugins @@ -18,33 +18,33 @@ Flytekit functionality. These plugins can be anything and for comparison can be :header-rows: 0 :widths: 20 30 -* - {doc}`SQL ` +* - {doc}`SQL ` - Execute SQL queries as tasks. -* - {doc}`Great Expectations ` +* - {doc}`Great Expectations ` - Validate data with `great_expectations`. -* - {doc}`Papermill ` +* - {doc}`Papermill ` - Execute Jupyter Notebooks with `papermill`. -* - {doc}`Pandera ` +* - {doc}`Pandera ` - Validate pandas dataframes with `pandera`. -* - {doc}`Modin ` +* - {doc}`Modin ` - Scale pandas workflows with `modin`. -* - {doc}`Dolt ` +* - {doc}`Dolt ` - Version your SQL database with `dolt`. -* - {doc}`DBT ` +* - {doc}`DBT ` - Run and test your `dbt` pipelines in Flyte. -* - {doc}`WhyLogs ` +* - {doc}`WhyLogs ` - `whylogs`: the open standard for data logging. -* - {doc}`MLFlow ` +* - {doc}`MLFlow ` - `mlflow`: the open standard for model tracking. -* - {doc}`ONNX ` +* - {doc}`ONNX ` - Convert ML models to ONNX models seamlessly. -* - {doc}`DuckDB ` +* - {doc}`DuckDB ` - Run analytical queries using DuckDB. -* - {doc}`Weights and Biases ` +* - {doc}`Weights and Biases ` - `wandb`: Machine learning platform to build better models faster. -* - {doc}`NIM ` +* - {doc}`NIM ` - Serve optimized model containers with NIM. -* - {doc}`Ollama ` +* - {doc}`Ollama ` - Serve fine-tuned LLMs with Ollama in a Flyte workflow. ``` @@ -70,19 +70,19 @@ orchestrated by Flyte itself, within its provisioned Kubernetes clusters. :header-rows: 0 :widths: 20 30 -* - {doc}`K8s Pods ` +* - {doc}`K8s Pods ` - Execute K8s pods for arbitrary workloads. -* - {doc}`K8s Cluster Dask Jobs ` +* - {doc}`K8s Cluster Dask Jobs ` - Run Dask jobs on a K8s Cluster. -* - {doc}`K8s Cluster Spark Jobs ` +* - {doc}`K8s Cluster Spark Jobs ` - Run Spark jobs on a K8s Cluster. -* - {doc}`Kubeflow PyTorch ` +* - {doc}`Kubeflow PyTorch ` - Run distributed PyTorch training jobs using `Kubeflow`. -* - {doc}`Kubeflow TensorFlow ` +* - {doc}`Kubeflow TensorFlow ` - Run distributed TensorFlow training jobs using `Kubeflow`. -* - {doc}`MPI Operator ` +* - {doc}`MPI Operator ` - Run distributed deep learning training jobs using Horovod and MPI. -* - {doc}`Ray Task ` +* - {doc}`Ray Task ` - Run Ray jobs on a K8s Cluster. ``` @@ -96,23 +96,23 @@ orchestrated by Flyte itself, within its provisioned Kubernetes clusters. :header-rows: 0 :widths: 20 30 -* - {doc}`Airflow agent ` +* - {doc}`Airflow agent ` - Run Airflow jobs in your workflows with the Airflow agent. -* - {doc}`BigQuery agent ` +* - {doc}`BigQuery agent ` - Run BigQuery jobs in your workflows with the BigQuery agent. -* - {doc}`ChatGPT agent ` +* - {doc}`ChatGPT agent ` - Run ChatGPT jobs in your workflows with the ChatGPT agent. -* - {doc}`Databricks ` +* - {doc}`Databricks ` - Run Databricks jobs in your workflows with the Databricks agent. -* - {doc}`Memory Machine Cloud ` +* - {doc}`Memory Machine Cloud ` - Execute tasks using the MemVerge Memory Machine Cloud agent. -* - {doc}`OpenAI Batch ` +* - {doc}`OpenAI Batch ` - Submit requests for asynchronous batch processing on OpenAI. -* - {doc}`SageMaker Inference ` +* - {doc}`SageMaker Inference ` - Deploy models and create, as well as trigger inference endpoints on SageMaker. -* - {doc}`Sensor ` +* - {doc}`Sensor ` - Run sensor jobs in your workflows with the sensor agent. -* - {doc}`Snowflake ` +* - {doc}`Snowflake ` - Run Snowflake jobs in your workflows with the Snowflake agent. ``` @@ -127,13 +127,13 @@ As the term suggests, external service backend plugins rely on external services :header-rows: 0 :widths: 20 30 -* - {doc}`AWS Athena plugin ` +* - {doc}`AWS Athena plugin ` - Execute queries using AWS Athena -* - {doc}`AWS Batch plugin ` +* - {doc}`AWS Batch plugin ` - Running tasks and workflows on AWS batch service -* - {doc}`Flyte Interactive ` +* - {doc}`Flyte Interactive ` - Execute tasks using Flyte Interactive to debug. -* - {doc}`Hive plugin ` +* - {doc}`Hive plugin ` - Run Hive jobs in your workflows. ``` @@ -187,6 +187,48 @@ constructs natively within other orchestration tools. :header-rows: 0 :widths: 20 30 -* - {doc}`Airflow ` +* - {doc}`Airflow ` - Trigger Flyte executions from Airflow. ``` + +```{toctree} +:maxdepth: -1 +:hidden: + +/auto_examples/airflow_agent/index +/auto_examples/airflow_plugin/index +/auto_examples/athena_plugin/index +/auto_examples/aws_batch_plugin/index +/auto_examples/bigquery_agent/index +/auto_examples/chatgpt_agent/index +/auto_examples/k8s_dask_plugin/index +/auto_examples/databricks_agent/index +/auto_examples/dbt_plugin/index +/auto_examples/dolt_plugin/index +/auto_examples/duckdb_plugin/index +/auto_examples/flyteinteractive_plugin/index +/auto_examples/greatexpectations_plugin/index +/auto_examples/hive_plugin/index +/auto_examples/k8s_pod_plugin/index +/auto_examples/mlflow_plugin/index +/auto_examples/mmcloud_agent/index +/auto_examples/modin_plugin/index +/auto_examples/kfmpi_plugin/index +/auto_examples/nim_plugin/index +/auto_examples/ollama_plugin/index +/auto_examples/onnx_plugin/index +/auto_examples/openai_batch_agent/index +/auto_examples/papermill_plugin/index +/auto_examples/pandera_plugin/index +/auto_examples/kfpytorch_plugin/index +/auto_examples/ray_plugin/index +/auto_examples/sagemaker_inference_agent/index +/auto_examples/sensor/index +/auto_examples/snowflake_agent/index +/auto_examples/k8s_spark_plugin/index +/auto_examples/sql_plugin/index +/auto_examples/kftensorflow_plugin/index +/auto_examples/wandb_plugin/index +/auto_examples/whylogs_plugin/index +Deprecated integrations +``` diff --git a/docs/bioinformatics_examples.md b/docs/tutorials/bioinformatics/index.md similarity index 81% rename from docs/bioinformatics_examples.md rename to docs/tutorials/bioinformatics/index.md index 0e3bd5508..9207682c4 100644 --- a/docs/bioinformatics_examples.md +++ b/docs/tutorials/bioinformatics/index.md @@ -10,7 +10,7 @@ Flyte very much supports running your bioinformatics applications. Dive deeper i :header-rows: 0 :widths: 20 30 -* - {doc}`Nucleotide Sequence Querying with BLASTX ` +* - {doc}`Nucleotide Sequence Querying with BLASTX ` - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database ``` @@ -19,5 +19,5 @@ Flyte very much supports running your bioinformatics applications. Dive deeper i :caption: Contents :hidden: -auto_examples/blast/index +/auto_examples/blast/index ``` diff --git a/docs/feature_engineering.md b/docs/tutorials/feature_engineering/index.md similarity index 62% rename from docs/feature_engineering.md rename to docs/tutorials/feature_engineering/index.md index b34a73891..99ac9f39b 100644 --- a/docs/feature_engineering.md +++ b/docs/tutorials/feature_engineering/index.md @@ -1,4 +1,4 @@ -# Feature Engineering +# Feature engineering **Feature Engineering** is an essential part of Machine Learning. Itss the process of transforming raw data into features that better represent the underlying problem @@ -10,9 +10,9 @@ Explore how features can be engineered with the power of Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`EDA and Feature Engineering With Papermill ` +* - {doc}`EDA and Feature Engineering With Papermill ` - How to use Jupyter notebook within Flyte -* - {doc}`Data Cleaning and Feature Serving With Feast ` +* - {doc}`Data Cleaning and Feature Serving With Feast ` - How to use Feast to serve data in Flyte ``` @@ -22,6 +22,6 @@ Explore how features can be engineered with the power of Flyte. :caption: Contents :hidden: -auto_examples/exploratory_data_analysis/index -auto_examples/feast_integration/index +/auto_examples/exploratory_data_analysis/index +/auto_examples/feast_integration/index ``` diff --git a/docs/flyte_lab.md b/docs/tutorials/flytelab/index.md similarity index 100% rename from docs/flyte_lab.md rename to docs/tutorials/flytelab/index.md diff --git a/docs/weather_forecasting.md b/docs/tutorials/flytelab/weather_forecasting.md similarity index 100% rename from docs/weather_forecasting.md rename to docs/tutorials/flytelab/weather_forecasting.md diff --git a/docs/tutorials.md b/docs/tutorials/index.md similarity index 62% rename from docs/tutorials.md rename to docs/tutorials/index.md index 559a71554..38aaa5b91 100644 --- a/docs/tutorials.md +++ b/docs/tutorials/index.md @@ -1,8 +1,3 @@ ---- -next-page: ml_training -next-page-title: Model Training ---- - (tutorials)= # Tutorials @@ -17,7 +12,7 @@ contributing samples easy. If this is your first time running these examples, fo {ref}`setup guide ` to get started. ```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. +Want to contribute an example? Check out the [Documentation contribution guide](https://docs.flyte.org/en/latest/flytesnacks/contribute.html). ``` ## 🤖 Model Training @@ -28,15 +23,15 @@ Train machine learning models from using your framework of choice. :header-rows: 0 :widths: 20 30 -* - {doc}`Diabetes Classification ` +* - {doc}`Diabetes Classification ` - Train an XGBoost model on the Pima Indians Diabetes Dataset. -* - {doc}`House Price Regression ` +* - {doc}`House Price Regression ` - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. -* - {doc}`MNIST Classification ` +* - {doc}`MNIST Classification ` - Train a neural network on MNIST with PyTorch and W&B -* - {doc}`NLP Processing with Gensim ` +* - {doc}`NLP Processing with Gensim ` - Word embedding and topic modelling on lee background corpus with Gensim -* - {doc}`Sales Forecasting ` +* - {doc}`Sales Forecasting ` - Use the Rossmann Store data to forecast sales with distributed training using Horovod on Spark. ``` @@ -48,9 +43,9 @@ Engineer the data features to improve your model accuracy. :header-rows: 0 :widths: 20 30 -* - {doc}`EDA and Feature Engineering With Papermill ` +* - {doc}`EDA and Feature Engineering With Papermill ` - How to use Jupyter notebook within Flyte -* - {doc}`Data Cleaning and Feature Serving With Feast ` +* - {doc}`Data Cleaning and Feature Serving With Feast ` - How to use Feast to serve data in Flyte ``` @@ -62,7 +57,7 @@ Perform computational biology with Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`Nucleotide Sequence Querying with BLASTX ` +* - {doc}`Nucleotide Sequence Querying with BLASTX ` - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database ``` @@ -74,6 +69,16 @@ The open-source repository of machine learning projects using Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`Weather Forecasting ` +* - {doc}`Weather Forecasting ` - Build an online weather forecasting application. ``` + +```{toctree} +:maxdepth: -1 +:hidden: + +Model Training +Feature engineering +Bioinformatics +Flytelab +``` diff --git a/docs/ml_training.md b/docs/tutorials/model_training/index.md similarity index 50% rename from docs/ml_training.md rename to docs/tutorials/model_training/index.md index cd909b2ec..f46ad18c3 100644 --- a/docs/ml_training.md +++ b/docs/tutorials/model_training/index.md @@ -1,4 +1,4 @@ -# Model Training +# Model training Understand how machine learning models can be trained from within Flyte, with an added advantage of orchestration benefits. @@ -6,15 +6,15 @@ Understand how machine learning models can be trained from within Flyte, with an :header-rows: 0 :widths: 20 30 -* - {doc}`Diabetes Classification ` +* - {doc}`Diabetes Classification ` - Train an XGBoost model on the Pima Indians Diabetes Dataset. -* - {doc}`House Price Regression ` +* - {doc}`House Price Regression ` - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. -* - {doc}`MNIST Classification ` +* - {doc}`MNIST Classification ` - Train a neural network on MNIST with PyTorch and W&B -* - {doc}`NLP Processing with Gensim ` +* - {doc}`NLP Processing with Gensim ` - Word embedding and topic modelling on lee background corpus with Gensim -* - {doc}`Forecast Sales Using Rossmann Store Sales ` +* - {doc}`Forecast Sales Using Rossmann Store Sales ` - Forecast sales data with data-parallel distributed training using Horovod on Spark. ``` @@ -23,9 +23,9 @@ Understand how machine learning models can be trained from within Flyte, with an :caption: Contents :hidden: -auto_examples/pima_diabetes/index -auto_examples/house_price_prediction/index -auto_examples/mnist_classifier/index -auto_examples/nlp_processing/index -auto_examples/forecasting_sales/index +/auto_examples/pima_diabetes/index +/auto_examples/house_price_prediction/index +/auto_examples/mnist_classifier/index +/auto_examples/nlp_processing/index +/auto_examples/forecasting_sales/index ``` diff --git a/examples/airflow_plugin/airflow_plugin/airflow.py b/examples/airflow_plugin/airflow_plugin/airflow.py index 35a296261..d47c75277 100644 --- a/examples/airflow_plugin/airflow_plugin/airflow.py +++ b/examples/airflow_plugin/airflow_plugin/airflow.py @@ -66,7 +66,7 @@ # Click `Save` in the end. # # :::{note} -# Use external IP as the Flyte `Host`. You can {std:ref}`deploy ` Flyte on an on-prem machine or on cloud. +# Use external IP as the Flyte `Host`. You can {ref}`deploy ` Flyte on an on-prem machine or on cloud. # ::: # # ## Register Flyte Code diff --git a/examples/bigquery_agent/requirements.in b/examples/bigquery_agent/requirements.in index a987746f1..e496aea34 100644 --- a/examples/bigquery_agent/requirements.in +++ b/examples/bigquery_agent/requirements.in @@ -1,5 +1,6 @@ flytekit wheel matplotlib +pandas flytekitplugins-deck-standard flytekitplugins-bigquery diff --git a/examples/bigquery_plugin/README.md b/examples/bigquery_plugin/README.md index 5027330e5..46e379751 100644 --- a/examples/bigquery_plugin/README.md +++ b/examples/bigquery_plugin/README.md @@ -1,5 +1,14 @@ -# BigQuery plugin example +# BigQuery plugin -**Note:** This example code uses the legacy implementation of the BigQuery integration. We recommend using the [BigQuery agent](https://docs.flyte.org/en/latest/flytesnacks/examples/bigquery_agent/index.html) instead. +```{warning} +This example code uses the legacy implementation of the BigQuery integration. We recommend using the [BigQuery agent](https://docs.flyte.org/en/latest/flytesnacks/examples/bigquery_agent/index.html) instead. +``` This directory contains example code for the deprecated BigQuery plugin. For documentation on installing and using the plugin, see the [BigQuery plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/bigquery_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +bigquery_plugin_example +``` diff --git a/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py b/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py index 000882653..3aa740edc 100644 --- a/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py +++ b/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py @@ -1,3 +1,9 @@ +# %% [markdown] +# (bigquery_plugin_example)= +# # BigQuery plugin example +# +# %% + try: from typing import Annotated except ImportError: diff --git a/examples/bigquery_plugin/requirements.in b/examples/bigquery_plugin/requirements.in index a987746f1..e496aea34 100644 --- a/examples/bigquery_plugin/requirements.in +++ b/examples/bigquery_plugin/requirements.in @@ -1,5 +1,6 @@ flytekit wheel matplotlib +pandas flytekitplugins-deck-standard flytekitplugins-bigquery diff --git a/examples/blast/requirements.in b/examples/blast/requirements.in index bec48ef8c..30f834337 100644 --- a/examples/blast/requirements.in +++ b/examples/blast/requirements.in @@ -1,4 +1,5 @@ flytekit>=0.32.3 wheel matplotlib +pandas flytekitplugins-deck-standard diff --git a/examples/databricks_plugin/README.md b/examples/databricks_plugin/README.md index ca7b3a2b6..636eb7f94 100644 --- a/examples/databricks_plugin/README.md +++ b/examples/databricks_plugin/README.md @@ -1,5 +1,14 @@ -# Databricks plugin example +# Databricks plugin -**Note:** This example code uses a legacy implementation of the Databricks integration. We recommend using the [Databricks agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +```{warning} +This example code uses a legacy implementation of the Databricks integration. We recommend using the [Databricks agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +``` This directory contains example code for the deprecated Databricks plugin. For documentation on installing and using the plugin, see the [Databricks plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/databricks_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +databricks_plugin_example +``` diff --git a/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py b/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py index 3137ca1b5..72f98c5ad 100644 --- a/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py +++ b/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py @@ -1,3 +1,9 @@ +# %% [markdown] +# (spark_on_databricks_plugin)= +# # Databricks plugin example +# +# %% + import datetime import random from operator import add diff --git a/examples/development_lifecycle/requirements.in b/examples/development_lifecycle/requirements.in index 8e50db9c6..89af2129e 100644 --- a/examples/development_lifecycle/requirements.in +++ b/examples/development_lifecycle/requirements.in @@ -3,4 +3,5 @@ flytekitplugins-deck-standard plotly scikit-learn tabulate +pandas pyarrow diff --git a/examples/house_price_prediction/README.md b/examples/house_price_prediction/README.md index 49f5c55c5..70341e6c2 100644 --- a/examples/house_price_prediction/README.md +++ b/examples/house_price_prediction/README.md @@ -12,7 +12,7 @@ In this example, we will train our data on the XGBoost model to predict house pr ## Where Does Flyte Fit In? - Orchestrates the machine learning pipeline. -- Helps cache the output state between {py:func}`tasks `. +- Helps cache the output state between {py:func}`tasks `. - Easier backtracking to the error source. - Provides a Rich UI to view and manage the pipeline. diff --git a/examples/house_price_prediction/requirements.in b/examples/house_price_prediction/requirements.in index b571e33d3..f09c8717b 100644 --- a/examples/house_price_prediction/requirements.in +++ b/examples/house_price_prediction/requirements.in @@ -7,3 +7,4 @@ joblib scikit-learn tabulate matplotlib +pandas diff --git a/examples/k8s_dask_plugin/README.md b/examples/k8s_dask_plugin/README.md index 713e810c2..c5f6a02ed 100644 --- a/examples/k8s_dask_plugin/README.md +++ b/examples/k8s_dask_plugin/README.md @@ -71,7 +71,7 @@ Flyte Dask utilizes the [Dask Kubernetes operator](https://kubernetes.dask.org/e in conjunction with a custom-built [Flyte Dask plugin](https://pkg.go.dev/github.com/flyteorg/flyteplugins@v1.0.28/go/tasks/plugins/k8s/dask). To leverage this functionality, you need to enable the backend plugin in your deployment. -You can follow the steps mentioned in the {ref}`flyte:deployment-plugin-setup-k8s` section +You can follow the steps mentioned in the {ref}`deployment-plugin-setup-k8s` section to enable the Flyte Dask plugin for your deployment. #### Step 2: Compute setup diff --git a/examples/k8s_pod_plugin/README.md b/examples/k8s_pod_plugin/README.md index e7b0e8dc8..4a1646be5 100644 --- a/examples/k8s_pod_plugin/README.md +++ b/examples/k8s_pod_plugin/README.md @@ -4,7 +4,7 @@ .. tags:: Integration, Kubernetes, Advanced ``` -Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. +Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. However, there may be situations where you need to run a job with more than one container or require additional capabilities, such as: - Running a hyper-parameter optimizer that stores state in a Redis database diff --git a/examples/k8s_spark_plugin/README.md b/examples/k8s_spark_plugin/README.md index 3bdbf721c..ad7a39cbe 100644 --- a/examples/k8s_spark_plugin/README.md +++ b/examples/k8s_spark_plugin/README.md @@ -27,7 +27,7 @@ However, please bear in mind that this functionality requires a significant Kube For optimal results, we highly recommend adopting the [multi-cluster mode](https://docs.flyte.org/en/latest/deployment/configuration/performance.html#multi-cluster-mode). -Additionally, consider enabling {std:ref}`resource quotas ` +Additionally, consider enabling {ref}`resource quotas ` for Spark Jobs that are both large in scale and executed frequently. Nonetheless, it is important to note that extremely short-duration jobs might not be the best fit for this setup. @@ -63,7 +63,7 @@ Flyte Spark employs the Spark on K8s operator in conjunction with a bespoke [Flyte Spark Plugin](https://pkg.go.dev/github.com/flyteorg/flyteplugins@v0.5.25/go/tasks/plugins/k8s/spark). This plugin serves as a backend component and necessitates activation within your deployment. -To enable it, follow the instructions outlined in the {ref}`flyte:deployment-plugin-setup-k8s` section. +To enable it, follow the instructions outlined in the {ref}`deployment-plugin-setup-k8s` section. :::{note} Refer to [this guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/gcp.md) to use GCP instead of AWS. diff --git a/examples/kfpytorch_plugin/README.md b/examples/kfpytorch_plugin/README.md index 95c67245e..e4f631780 100644 --- a/examples/kfpytorch_plugin/README.md +++ b/examples/kfpytorch_plugin/README.md @@ -17,7 +17,7 @@ To use the PyTorch plugin, run the following command: pip install flytekitplugins-kfpytorch ``` -To enable the plugin in the backend, follow instructions outlined in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. ## Run the example on the Flyte cluster diff --git a/examples/kftensorflow_plugin/README.md b/examples/kftensorflow_plugin/README.md index 636d0514d..d97b4b477 100644 --- a/examples/kftensorflow_plugin/README.md +++ b/examples/kftensorflow_plugin/README.md @@ -17,7 +17,7 @@ To install the Kubeflow TensorFlow plugin, run the following command: pip install flytekitplugins-kftensorflow ``` -To enable the plugin in the backend, follow instructions outlined in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. ## Run the example on the Flyte cluster diff --git a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py index d045bbb80..eac12209d 100644 --- a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py +++ b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py @@ -24,7 +24,7 @@ def to_int(s: str) -> int: # %% [markdown] -# [Resource](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/productionizing/customizing_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/customizing_dependencies/multi_images.html) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. +# [Resource](https://docs.flyte.org/en/latest/user_guide/productionizing/customizing_task_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/index.html#customizing-dependencies) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. # %% diff --git a/examples/pima_diabetes/requirements.in b/examples/pima_diabetes/requirements.in index b571e33d3..f09c8717b 100644 --- a/examples/pima_diabetes/requirements.in +++ b/examples/pima_diabetes/requirements.in @@ -7,3 +7,4 @@ joblib scikit-learn tabulate matplotlib +pandas diff --git a/examples/ray_plugin/README.md b/examples/ray_plugin/README.md index 3ad2a65a8..575c97b50 100644 --- a/examples/ray_plugin/README.md +++ b/examples/ray_plugin/README.md @@ -25,7 +25,7 @@ To install the Ray plugin, run the following command: pip install flytekitplugins-ray ``` -To enable the plugin in the backend, refer to the instructions provided in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, refer to the instructions provided in the {ref}`deployment-plugin-setup-k8s` guide. ## Implementation details diff --git a/examples/snowflake_plugin/README.md b/examples/snowflake_plugin/README.md index 6921e44ae..43a161c5c 100644 --- a/examples/snowflake_plugin/README.md +++ b/examples/snowflake_plugin/README.md @@ -1,3 +1,14 @@ -**Note:** This example code uses a legacy implementation of the Snowflake integration. We recommend using the [Snowflake agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +# Snowflake plugin + +```{warning} +This example code uses a legacy implementation of the Snowflake integration. We recommend using the [Snowflake agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +``` This directory contains example code for the deprecated Snowflake plugin. For documentation on installing and using the plugin, see the [Snowflake plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/snowflake_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +snowflake_plugin_example +``` diff --git a/examples/snowflake_plugin/snowflake_plugin.md b/examples/snowflake_plugin/snowflake_plugin.md index 34f622387..e4313fb11 100644 --- a/examples/snowflake_plugin/snowflake_plugin.md +++ b/examples/snowflake_plugin/snowflake_plugin.md @@ -20,7 +20,7 @@ pip install flytekitplugins-snowflake If you intend to run the plugin on the Flyte cluster, you must first set it up on the backend. Please refer to the -{std:ref}`Snowflake plugin setup guide ` +{ref}`Snowflake plugin setup guide ` for detailed instructions. ## Run the example on the Flyte cluster diff --git a/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py b/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py index 7ee3b2858..a9903a900 100644 --- a/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py +++ b/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py @@ -1,3 +1,8 @@ +# %% [markdown] +# # Snowflake plugin example +# +# %% + from flytekit import kwtypes, workflow from flytekitplugins.snowflake import SnowflakeConfig, SnowflakeTask diff --git a/examples/whylogs_plugin/requirements.in b/examples/whylogs_plugin/requirements.in index 3f8bc07aa..3ecac63ec 100644 --- a/examples/whylogs_plugin/requirements.in +++ b/examples/whylogs_plugin/requirements.in @@ -7,3 +7,4 @@ scikit-learn whylogs[s3] whylogs[mlflow] whylogs[whylabs] +pandas From 5e73956dd99f9b6d46f40eb2a0aa1532883232ce Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Fri, 4 Oct 2024 18:14:26 -0500 Subject: [PATCH 07/28] Add fake DocSearch API key to get monodocs build to succeed (#1740) add fake docsearch api key to get monodocs build to succeed Signed-off-by: nikki everett --- .github/workflows/monodocs_build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/monodocs_build.yml b/.github/workflows/monodocs_build.yml index 7460e7bcf..5c4907e22 100644 --- a/.github/workflows/monodocs_build.yml +++ b/.github/workflows/monodocs_build.yml @@ -65,6 +65,7 @@ jobs: shell: bash -el {0} env: FLYTESNACKS_LOCAL_PATH: ${{ github.workspace }}/flytesnacks + DOCSEARCH_API_KEY: fake_docsearch_api_key # must be set to get doc build to succeed run: | conda activate monodocs-env make -C docs html SPHINXOPTS="-W" From 568754edbf318c33c5bd696309f373a0bdab2f84 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 7 Oct 2024 12:50:53 -0400 Subject: [PATCH 08/28] Adds neptune plugin example (#1723) * Adds neptune plugin example Signed-off-by: Thomas J. Fan * Adds neptune to index Signed-off-by: Thomas J. Fan * Address comments Signed-off-by: Thomas J. Fan * Adds namespace Signed-off-by: Thomas J. Fan * Fix neptune Signed-off-by: Thomas J. Fan * Address comments Signed-off-by: Thomas J. Fan * Adds neptune to doctree Signed-off-by: Thomas J. Fan --------- Signed-off-by: Thomas J. Fan --- docs/integrations/index.md | 3 + examples/neptune_plugin/Dockerfile | 24 +++ examples/neptune_plugin/README.md | 47 ++++++ .../neptune_plugin/neptune_plugin/__init__.py | 0 .../neptune_plugin/neptune_example.py | 153 ++++++++++++++++++ examples/neptune_plugin/requirements.in | 7 + 6 files changed, 234 insertions(+) create mode 100644 examples/neptune_plugin/Dockerfile create mode 100644 examples/neptune_plugin/README.md create mode 100644 examples/neptune_plugin/neptune_plugin/__init__.py create mode 100644 examples/neptune_plugin/neptune_plugin/neptune_example.py create mode 100644 examples/neptune_plugin/requirements.in diff --git a/docs/integrations/index.md b/docs/integrations/index.md index b1cd315ac..34da66778 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -42,6 +42,8 @@ Flytekit functionality. These plugins can be anything and for comparison can be - Run analytical queries using DuckDB. * - {doc}`Weights and Biases ` - `wandb`: Machine learning platform to build better models faster. +* - {doc}`Neptune ` + - `neptune`: Neptune is the MLOps stack component for experiment tracking. * - {doc}`NIM ` - Serve optimized model containers with NIM. * - {doc}`Ollama ` @@ -214,6 +216,7 @@ constructs natively within other orchestration tools. /auto_examples/mmcloud_agent/index /auto_examples/modin_plugin/index /auto_examples/kfmpi_plugin/index +/auto_examples/neptune_plugin/index /auto_examples/nim_plugin/index /auto_examples/ollama_plugin/index /auto_examples/onnx_plugin/index diff --git a/examples/neptune_plugin/Dockerfile b/examples/neptune_plugin/Dockerfile new file mode 100644 index 000000000..80482086b --- /dev/null +++ b/examples/neptune_plugin/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.11-slim-bookworm +LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install --no-cache-dir -r /root/requirements.in + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/neptune_plugin/README.md b/examples/neptune_plugin/README.md new file mode 100644 index 000000000..5e6e321e3 --- /dev/null +++ b/examples/neptune_plugin/README.md @@ -0,0 +1,47 @@ +(neptune)= + +# Neptune + +```{eval-rst} +.. tags:: Integration, Data, Metrics, Intermediate +``` + +[Neptune](https://neptune.ai/) is the MLOps stack component for experiment tracking. It offers a single place to log, compare, store, and collaborate on experiments and models. This plugin enables seamless use of Neptune within Flyte by configuring links between the two platforms. You can find more information about how to use Neptune in their [documentation](https://docs.neptune.ai/). + +## Installation + +To install the Flyte Neptune plugin, , run the following command: + +```bash +pip install flytekitplugins-neptune +``` + +## Example usage + +For a usage example, see the {doc}`Neptune example `. + +## Local testing + +To run {doc}`Neptune example ` locally: + +1. Create an account on [Neptune](https://neptune.ai/). +2. Create a project on Neptune. +3. In the example, set `NEPTUNE_PROJECT` to your project name. +4. Add a secret using [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html) with `key="neptune-api-token"` and `group="neptune-api-group"` +5. If you want to see the dynamic log links in the UI, then add the configuration in the next section. + +## Flyte deployment configuration + +To enable dynamic log links, add the plugin to Flyte's configuration file: +```yaml +plugins: + logs: + dynamic-log-links: + - neptune-run-id: + displayName: Neptune + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.project }}?query=(%60flyte%2Fexecution_id%60%3Astring%20%3D%20%22{{ .executionName }}-{{ .nodeId }}-{{ .taskRetryAttempt }}%22)&lbViewUnpacked=true" +``` + +```{auto-examples-toc} +neptune_example +``` diff --git a/examples/neptune_plugin/neptune_plugin/__init__.py b/examples/neptune_plugin/neptune_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/neptune_plugin/neptune_plugin/neptune_example.py b/examples/neptune_plugin/neptune_plugin/neptune_example.py new file mode 100644 index 000000000..84638cfe5 --- /dev/null +++ b/examples/neptune_plugin/neptune_plugin/neptune_example.py @@ -0,0 +1,153 @@ +# %% [markdown] +# (neptune_example)= +# +# # Neptune example +# Neptune is the MLOps stack component for experiment tracking. It offers a single place +# to log, compare, store, and collaborate on experiments and models. This plugin +# enables seamless use of Neptune within Flyte by configuring links between the +# two platforms. In this example, we learn how to train scale up training multiple +# XGBoost models and use Neptune for tracking. +# %% +from typing import List, Tuple + +import numpy as np +from flytekit import ( + ImageSpec, + Resources, + Secret, + current_context, + dynamic, + task, + workflow, +) +from flytekitplugins.neptune import neptune_init_run + +# %% [markdown] +# First, we specify the Neptune project that was created on Neptune's platform. +# Please update `NEPTUNE_PROJECT` to the value associated with your account. +NEPTUNE_PROJECT = "username/project" + +# %% [markdown] +# Neptune requires an API key to authenticate with their service. In the above example, +# the secret is created using +# [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). +api_key = Secret(key="neptune-api-token", group="neptune-api-group") + +# %% [markdown] +# Next, we use `ImageSpec` to construct a container with the dependencies for our +# XGBoost training task. Please set the `REGISTRY` to a registry that your cluster can access; +REGISTRY = "localhost:30000" + +image = ImageSpec( + name="flytekit-xgboost", + packages=[ + "neptune", + "neptune-xgboost", + "flytekitplugins-neptune", + "scikit-learn==1.5.1", + "numpy==1.26.1", + "matplotlib==3.9.2", + ], + builder="default", + registry=REGISTRY, +) + + +# %% +# First, we use a task to download the dataset and cache the data in Flyte: +@task( + container_image=image, + cache=True, + cache_version="v2", + requests=Resources(cpu="2", mem="2Gi"), +) +def get_dataset() -> Tuple[np.ndarray, np.ndarray]: + from sklearn.datasets import fetch_california_housing + + X, y = fetch_california_housing(return_X_y=True, as_frame=False) + return X, y + + +# %% +# Next, we use the `neptune_init_run` decorator to configure Flyte to train an XGBoost +# model. The decorator requires an `api_key` secret to authenticate with Neptune and +# the task definition needs to request the same `api_key` secret. In the training +# function, the [Neptune run object](https://docs.neptune.ai/api/run/) is accessible +# through `current_context().neptune_run`, which is frequently used +# in Neptune's integrations. In this example, we pass the `Run` object into Neptune's +# XGBoost callback. +@task( + container_image=image, + secret_requests=[api_key], + requests=Resources(cpu="2", mem="4Gi"), +) +@neptune_init_run(project=NEPTUNE_PROJECT, secret=api_key) +def train_model(max_depth: int, X: np.ndarray, y: np.ndarray): + import xgboost as xgb + from neptune.integrations.xgboost import NeptuneCallback + from sklearn.model_selection import train_test_split + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123) + dtrain = xgb.DMatrix(X_train, label=y_train) + dval = xgb.DMatrix(X_test, label=y_test) + + ctx = current_context() + run = ctx.neptune_run + neptune_callback = NeptuneCallback(run=run) + + model_params = { + "tree_method": "hist", + "eta": 0.7, + "gamma": 0.001, + "max_depth": max_depth, + "objective": "reg:squarederror", + "eval_metric": ["mae", "rmse"], + } + evals = [(dtrain, "train"), (dval, "valid")] + + # Train the model and log metadata to the run in Neptune + xgb.train( + params=model_params, + dtrain=dtrain, + num_boost_round=57, + evals=evals, + callbacks=[ + neptune_callback, + xgb.callback.LearningRateScheduler(lambda epoch: 0.99**epoch), + xgb.callback.EarlyStopping(rounds=30), + ], + ) + + +# %% +# With Flyte's dynamic workflows, we can scale up multiple training jobs with different +# `max_depths`: +@dynamic(container_image=image) +def train_multiple_models(max_depths: List[int], X: np.ndarray, y: np.ndarray): + for max_depth in max_depths: + train_model(max_depth=max_depth, X=X, y=y) + + +@workflow +def train_wf(max_depths: List[int] = [2, 4, 10]): + X, y = get_dataset() + train_multiple_models(max_depths=max_depths, X=X, y=y) + + +# %% +# To run this workflow on a remote Flyte cluster run: +# ```bash +# union run --remote neptune_example.py train_wf +# ``` + + +# %% [markdown] +# To enable dynamic log links, add plugin to Flyte's configuration file: +# ```yaml +# plugins: +# logs: +# dynamic-log-links: +# - neptune-run-id: +# displayName: Neptune +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.project }}?query=(%60flyte%2Fexecution_id%60%3Astring%20%3D%20%22{{ .executionName }}-{{ .nodeId }}-{{ .taskRetryAttempt }}%22)&lbViewUnpacked=true" +# ``` diff --git a/examples/neptune_plugin/requirements.in b/examples/neptune_plugin/requirements.in new file mode 100644 index 000000000..878f4ef4e --- /dev/null +++ b/examples/neptune_plugin/requirements.in @@ -0,0 +1,7 @@ +flytekitplugins-neptune +xgboost +neptune +neptune-xgboost +scikit-learn==1.5.1 +numpy==1.26.1 +matplotlib==3.9.2 From d6a986161ff17dc2de46eb7d5dcea7b60660cfa8 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 7 Oct 2024 16:46:58 -0400 Subject: [PATCH 09/28] Update neptune_example.py (#1743) * Update neptune_example.py This PR fixes some of the formatting issues in the neptune plugin example Signed-off-by: Niels Bantilan * Update neptune_example.py Signed-off-by: Niels Bantilan * make linter happy Signed-off-by: Niels Bantilan --------- Signed-off-by: Niels Bantilan --- .../neptune_plugin/neptune_example.py | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/examples/neptune_plugin/neptune_plugin/neptune_example.py b/examples/neptune_plugin/neptune_plugin/neptune_example.py index 84638cfe5..b6a7949b8 100644 --- a/examples/neptune_plugin/neptune_plugin/neptune_example.py +++ b/examples/neptune_plugin/neptune_plugin/neptune_example.py @@ -25,17 +25,23 @@ # %% [markdown] # First, we specify the Neptune project that was created on Neptune's platform. # Please update `NEPTUNE_PROJECT` to the value associated with your account. + +# %% NEPTUNE_PROJECT = "username/project" # %% [markdown] # Neptune requires an API key to authenticate with their service. In the above example, # the secret is created using # [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). + +# %% api_key = Secret(key="neptune-api-token", group="neptune-api-group") # %% [markdown] # Next, we use `ImageSpec` to construct a container with the dependencies for our # XGBoost training task. Please set the `REGISTRY` to a registry that your cluster can access; + +# %% REGISTRY = "localhost:30000" image = ImageSpec( @@ -53,8 +59,11 @@ ) -# %% +# %% [markdown] # First, we use a task to download the dataset and cache the data in Flyte: + + +# %% @task( container_image=image, cache=True, @@ -68,7 +77,7 @@ def get_dataset() -> Tuple[np.ndarray, np.ndarray]: return X, y -# %% +# %% [markdown] # Next, we use the `neptune_init_run` decorator to configure Flyte to train an XGBoost # model. The decorator requires an `api_key` secret to authenticate with Neptune and # the task definition needs to request the same `api_key` secret. In the training @@ -76,6 +85,9 @@ def get_dataset() -> Tuple[np.ndarray, np.ndarray]: # through `current_context().neptune_run`, which is frequently used # in Neptune's integrations. In this example, we pass the `Run` object into Neptune's # XGBoost callback. + + +# %% @task( container_image=image, secret_requests=[api_key], @@ -119,9 +131,12 @@ def train_model(max_depth: int, X: np.ndarray, y: np.ndarray): ) -# %% +# %% [markdown] # With Flyte's dynamic workflows, we can scale up multiple training jobs with different # `max_depths`: + + +# %% @dynamic(container_image=image) def train_multiple_models(max_depths: List[int], X: np.ndarray, y: np.ndarray): for max_depth in max_depths: @@ -134,7 +149,7 @@ def train_wf(max_depths: List[int] = [2, 4, 10]): train_multiple_models(max_depths=max_depths, X=X, y=y) -# %% +# %% [markdown] # To run this workflow on a remote Flyte cluster run: # ```bash # union run --remote neptune_example.py train_wf From 6c179d8d44fc7254d9ac52fed59d3835d2af0f12 Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Tue, 8 Oct 2024 12:43:17 -0500 Subject: [PATCH 10/28] Use updated LABEL key=value format for wandb Dockerfile (#1745) use updated LABEL key/value format Signed-off-by: nikki everett --- examples/wandb_plugin/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/wandb_plugin/Dockerfile b/examples/wandb_plugin/Dockerfile index 4e04f77bd..65da827a3 100644 --- a/examples/wandb_plugin/Dockerfile +++ b/examples/wandb_plugin/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.11-slim-bookworm -LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks WORKDIR /root ENV VENV /opt/venv From 846ad8b384df366c5dcc16330877f80b71f294f7 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Tue, 8 Oct 2024 15:03:14 -0400 Subject: [PATCH 11/28] Update neptune description (#1744) Signed-off-by: Thomas J. Fan --- examples/neptune_plugin/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/neptune_plugin/README.md b/examples/neptune_plugin/README.md index 5e6e321e3..bbf76fbf4 100644 --- a/examples/neptune_plugin/README.md +++ b/examples/neptune_plugin/README.md @@ -6,7 +6,7 @@ .. tags:: Integration, Data, Metrics, Intermediate ``` -[Neptune](https://neptune.ai/) is the MLOps stack component for experiment tracking. It offers a single place to log, compare, store, and collaborate on experiments and models. This plugin enables seamless use of Neptune within Flyte by configuring links between the two platforms. You can find more information about how to use Neptune in their [documentation](https://docs.neptune.ai/). +[Neptune](https://neptune.ai/) is an experiment tracker for large-scale model training. It allows AI researchers to monitor their model training in real time, visualize and compare experiments, and collaborate on them with a team. This plugin enables seamless use of Neptune within Flyte by configuring links between the two platforms. You can find more information about how to use Neptune in their [documentation](https://docs.neptune.ai/). ## Installation From 3778c448d65ca76d385da6489799120134d3a23e Mon Sep 17 00:00:00 2001 From: Future-Outlier Date: Wed, 9 Oct 2024 23:47:39 +0800 Subject: [PATCH 12/28] Use Pure Dataclass In Example (#1747) Signed-off-by: Future-Outlier --- examples/data_types_and_io/data_types_and_io/dataclass.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/data_types_and_io/data_types_and_io/dataclass.py b/examples/data_types_and_io/data_types_and_io/dataclass.py index 3fac38f0d..44c73e4f9 100644 --- a/examples/data_types_and_io/data_types_and_io/dataclass.py +++ b/examples/data_types_and_io/data_types_and_io/dataclass.py @@ -7,7 +7,6 @@ from flytekit.types.directory import FlyteDirectory from flytekit.types.file import FlyteFile from flytekit.types.structured import StructuredDataset -from mashumaro.mixins.json import DataClassJSONMixin # NOTE: If you're using Flytekit version below v1.10, you'll need to decorate with `@dataclass_json` using # `from dataclass_json import dataclass_json` instead of inheriting from Mashumaro's `DataClassJSONMixin`. @@ -23,7 +22,7 @@ # Python types # Define a `dataclass` with `int`, `str` and `dict` as the data types @dataclass -class Datum(DataClassJSONMixin): +class Datum: x: int y: str z: dict[int, str] @@ -50,7 +49,7 @@ def add(x: Datum, y: Datum) -> Datum: # Flyte types @dataclass -class FlyteTypes(DataClassJSONMixin): +class FlyteTypes: dataframe: StructuredDataset file: FlyteFile directory: FlyteDirectory From fd33533cf78ecab2fbdc0ec49f39cf63c114b0e3 Mon Sep 17 00:00:00 2001 From: Raghav Mangla <97332401+RaghavMangla@users.noreply.github.com> Date: Mon, 14 Oct 2024 18:05:16 +0530 Subject: [PATCH 13/28] Pr 1741 (#1742) * issue fixed:document the process of rendering a notebooktask in a flyte deck Signed-off-by: RaghavMangla * fixed Signed-off-by: RaghavMangla --------- Signed-off-by: RaghavMangla --- examples/papermill_plugin/papermill_plugin/simple.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/papermill_plugin/papermill_plugin/simple.py b/examples/papermill_plugin/papermill_plugin/simple.py index 9d4536561..90b642b9b 100644 --- a/examples/papermill_plugin/papermill_plugin/simple.py +++ b/examples/papermill_plugin/papermill_plugin/simple.py @@ -32,6 +32,7 @@ name="simple-nb", notebook_path=str(pathlib.Path(__file__).parent.absolute() / "nb_simple.ipynb"), render_deck=True, + enable_deck=True, inputs=kwtypes(v=float), outputs=kwtypes(square=float), ) From 1be10819a5168cecdc35b6826bbefcaaff814675 Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Mon, 14 Oct 2024 10:29:26 -0500 Subject: [PATCH 14/28] DOC-648 flytesnacks edits needed for Neptune and W&B flytekit plugins documentation (#1748) * update refs Signed-off-by: nikki everett * clean up integrations information architecture Signed-off-by: nikki everett * fix databricks agent title Signed-off-by: nikki everett * move k8s pod plugin to deprecated integrations section and add deprecation notice Signed-off-by: nikki everett --------- Signed-off-by: nikki everett --- .../deprecated_integrations/index.md | 10 - docs/integrations/index.md | 238 +++++++++++------- examples/databricks_agent/README.md | 2 +- examples/k8s_pod_plugin/README.md | 4 + examples/neptune_plugin/README.md | 6 +- examples/wandb_plugin/README.md | 2 +- 6 files changed, 150 insertions(+), 112 deletions(-) delete mode 100644 docs/integrations/deprecated_integrations/index.md diff --git a/docs/integrations/deprecated_integrations/index.md b/docs/integrations/deprecated_integrations/index.md deleted file mode 100644 index ff5c12e2a..000000000 --- a/docs/integrations/deprecated_integrations/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Deprecated integrations - -```{toctree} -:maxdepth: -1 -:hidden: - -BigQuery plugin -Databricks plugin -Snowflake plugin -``` diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 34da66778..283ac34d7 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -8,83 +8,79 @@ Flyte is designed to be highly extensible and can be customized in multiple ways Want to contribute an example? Check out the {ref}`Documentation contribution guide `. ``` -## Flytekit Plugins +## Flytekit plugins -Flytekit plugins are simple plugins that can be implemented purely in python, unit tested locally and allow extending -Flytekit functionality. These plugins can be anything and for comparison can be thought of like -[Airflow Operators](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/index.html). +Flytekit plugins can be implemented purely in Python, unit tested locally, and allow extending +Flytekit functionality. For comparison, these plugins can be thought of like +[Airflow operators](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/index.html). ```{list-table} :header-rows: 0 :widths: 20 30 -* - {doc}`SQL ` - - Execute SQL queries as tasks. -* - {doc}`Great Expectations ` - - Validate data with `great_expectations`. -* - {doc}`Papermill ` - - Execute Jupyter Notebooks with `papermill`. -* - {doc}`Pandera ` - - Validate pandas dataframes with `pandera`. -* - {doc}`Modin ` - - Scale pandas workflows with `modin`. -* - {doc}`Dolt ` - - Version your SQL database with `dolt`. * - {doc}`DBT ` - Run and test your `dbt` pipelines in Flyte. -* - {doc}`WhyLogs ` - - `whylogs`: the open standard for data logging. -* - {doc}`MLFlow ` - - `mlflow`: the open standard for model tracking. -* - {doc}`ONNX ` - - Convert ML models to ONNX models seamlessly. +* - {doc}`Dolt ` + - Version your SQL database with `dolt`. * - {doc}`DuckDB ` - Run analytical queries using DuckDB. -* - {doc}`Weights and Biases ` - - `wandb`: Machine learning platform to build better models faster. +* - {doc}`Great Expectations ` + - Validate data with `great_expectations`. +* - {doc}`MLFlow ` + - `mlflow`: the open standard for model tracking. +* - {doc}`Modin ` + - Scale pandas workflows with `modin`. * - {doc}`Neptune ` - `neptune`: Neptune is the MLOps stack component for experiment tracking. * - {doc}`NIM ` - Serve optimized model containers with NIM. * - {doc}`Ollama ` - Serve fine-tuned LLMs with Ollama in a Flyte workflow. +* - {doc}`ONNX ` + - Convert ML models to ONNX models seamlessly. +* - {doc}`Pandera ` + - Validate pandas dataframes with `pandera`. +* - {doc}`Papermill ` + - Execute Jupyter Notebooks with `papermill`. +* - {doc}`SQL ` + - Execute SQL queries as tasks. +* - {doc}`Weights and Biases ` + - `wandb`: Machine learning platform to build better models faster. +* - {doc}`WhyLogs ` + - `whylogs`: the open standard for data logging. ``` -:::{dropdown} {fa}`info-circle` Using flytekit plugins +:::{dropdown} {fa}`info-circle` Using Flytekit plugins :animate: fade-in-slide-down -Data is automatically marshalled and unmarshalled in and out of the plugin. Users should mostly implement the -{py:class}`~flytekit.core.base_task.PythonTask` API defined in Flytekit. +Data is automatically marshalled and unmarshalled in and out of the plugin. Users should mostly implement the {py:class}`~flytekit.core.base_task.PythonTask` API defined in Flytekit. -Flytekit Plugins are lazily loaded and can be released independently like libraries. We follow a convention to name the -plugin like `flytekitplugins-*`, where `*` indicates the package to be integrated into Flytekit. For example -`flytekitplugins-papermill` enables users to author Flytekit tasks using [Papermill](https://papermill.readthedocs.io/en/latest/). +Flytekit plugins are lazily loaded and can be released independently like libraries. The naming convention is `flytekitplugins-*`, where `*` indicates the package to be integrated into Flytekit. For example, `flytekitplugins-papermill` enables users to author Flytekit tasks using [Papermill](https://papermill.readthedocs.io/en/latest/). You can find the plugins maintained by the core Flyte team [here](https://github.com/flyteorg/flytekit/tree/master/plugins). ::: -## Native Backend Plugins +## Native backend plugins -Native Backend Plugins are the plugins that can be executed without any external service dependencies because the compute is -orchestrated by Flyte itself, within its provisioned Kubernetes clusters. +Native backend plugins can be executed without any external service dependencies because the compute is orchestrated by Flyte itself, within its provisioned Kubernetes clusters. ```{list-table} :header-rows: 0 :widths: 20 30 -* - {doc}`K8s Pods ` - - Execute K8s pods for arbitrary workloads. -* - {doc}`K8s Cluster Dask Jobs ` - - Run Dask jobs on a K8s Cluster. -* - {doc}`K8s Cluster Spark Jobs ` - - Run Spark jobs on a K8s Cluster. * - {doc}`Kubeflow PyTorch ` - Run distributed PyTorch training jobs using `Kubeflow`. * - {doc}`Kubeflow TensorFlow ` - Run distributed TensorFlow training jobs using `Kubeflow`. +* - {doc}`Kubernetes pods ` + - Execute Kubernetes pods for arbitrary workloads. +* - {doc}`Kubernetes cluster Dask jobs ` + - Run Dask jobs on a Kubernetes Cluster. +* - {doc}`Kubernetes cluster Spark jobs ` + - Run Spark jobs on a Kubernetes Cluster. * - {doc}`MPI Operator ` - Run distributed deep learning training jobs using Horovod and MPI. -* - {doc}`Ray Task ` +* - {doc}`Ray ` - Run Ray jobs on a K8s Cluster. ``` @@ -98,54 +94,53 @@ orchestrated by Flyte itself, within its provisioned Kubernetes clusters. :header-rows: 0 :widths: 20 30 +* - {doc}`AWS SageMaker Inference agent ` + - Deploy models and create, as well as trigger inference endpoints on AWS SageMaker. * - {doc}`Airflow agent ` - Run Airflow jobs in your workflows with the Airflow agent. * - {doc}`BigQuery agent ` - Run BigQuery jobs in your workflows with the BigQuery agent. * - {doc}`ChatGPT agent ` - Run ChatGPT jobs in your workflows with the ChatGPT agent. -* - {doc}`Databricks ` +* - {doc}`Databricks agent ` - Run Databricks jobs in your workflows with the Databricks agent. -* - {doc}`Memory Machine Cloud ` +* - {doc}`Memory Machine Cloud agent ` - Execute tasks using the MemVerge Memory Machine Cloud agent. * - {doc}`OpenAI Batch ` - Submit requests for asynchronous batch processing on OpenAI. -* - {doc}`SageMaker Inference ` - - Deploy models and create, as well as trigger inference endpoints on SageMaker. -* - {doc}`Sensor ` +* - {doc}`Sensor agent ` - Run sensor jobs in your workflows with the sensor agent. -* - {doc}`Snowflake ` +* - {doc}`Snowflake agent ` - Run Snowflake jobs in your workflows with the Snowflake agent. ``` (external_service_backend_plugins)= -## External Service Backend Plugins +## External service backend plugins -As the term suggests, external service backend plugins rely on external services like -[Hive](https://docs.qubole.com/en/latest/user-guide/engines/hive/index.html) for handling the workload defined in the Flyte task that uses the respective plugin. +As the term suggests, these plugins rely on external services to handle the workload defined in the Flyte task that uses the plugin. ```{list-table} :header-rows: 0 :widths: 20 30 -* - {doc}`AWS Athena plugin ` +* - {doc}`AWS Athena ` - Execute queries using AWS Athena -* - {doc}`AWS Batch plugin ` +* - {doc}`AWS Batch ` - Running tasks and workflows on AWS batch service * - {doc}`Flyte Interactive ` - Execute tasks using Flyte Interactive to debug. -* - {doc}`Hive plugin ` +* - {doc}`Hive ` - Run Hive jobs in your workflows. ``` (enable-backend-plugins)= -::::{dropdown} {fa}`info-circle` Enabling Backend Plugins +::::{dropdown} {fa}`info-circle` Enabling backend plugins :animate: fade-in-slide-down -To enable a backend plugin you have to add the `ID` of the plugin to the enabled plugins list. The `enabled-plugins` is available under the `tasks > task-plugins` section of FlytePropeller's configuration. -The plugin configuration structure is defined [here](https://pkg.go.dev/github.com/flyteorg/flytepropeller@v0.6.1/pkg/controller/nodes/task/config#TaskPluginConfig). An example of the config follows, +To enable a backend plugin, you must add the `ID` of the plugin to the enabled plugins list. The `enabled-plugins` is available under the `tasks > task-plugins` section of FlytePropeller's configuration. +The plugin configuration structure is defined [here](https://pkg.go.dev/github.com/flyteorg/flytepropeller@v0.6.1/pkg/controller/nodes/task/config#TaskPluginConfig). An example of the config follows: ```yaml tasks: @@ -160,15 +155,15 @@ tasks: container_array: k8s-array ``` -**Finding the `ID` of the Backend Plugin** +**Finding the `ID` of the backend plugin** -This is a little tricky since you have to look at the source code of the plugin to figure out the `ID`. In the case of Spark, for example, the value of `ID` is used [here](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L424) here, defined as [spark](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L41). +To find the `ID` of the backend plugin, look at the source code of the plugin. For examples, in the case of Spark, the value of `ID` is used [here](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L424), defined as [spark](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L41). :::: -## SDKs for Writing Tasks and Workflows +## SDKs for writing tasks and workflows -The {ref}`community ` would love to help you with your own ideas of building a new SDK. Currently the available SDKs are: +The {ref}`community ` would love to help you build new SDKs. Currently, the available SDKs are: ```{list-table} :header-rows: 0 @@ -180,7 +175,7 @@ The {ref}`community ` would love to help you with your own ideas of b - The Java/Scala SDK for Flyte. ``` -## Flyte Operators +## Flyte operators Flyte can be integrated with other orchestrators to help you leverage Flyte's constructs natively within other orchestration tools. @@ -196,42 +191,91 @@ constructs natively within other orchestration tools. ```{toctree} :maxdepth: -1 :hidden: +:caption: Flytekit plugins + +DBT +Dolt +DuckDB +Great Expectations +MLFlow +Modin +Neptune +NIM +Ollama +ONNX +Pandera +Papermill +SQL +Weights & Biases +WhyLogs +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Native backend plugins + +Kubeflow PyTorch +Kubeflow TensorFlow +Kubernetes cluster Dask jobs +Kubernetes cluster Spark jobs +MPI Operator +Ray +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Flyte agents + +Airflow agent +AWS Sagemaker inference agent +BigQuery agent +ChatGPT agent +Databricks agent +Memory Machine Cloud agent +OpenAI batch agent +Sensor agent +Snowflake agent +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: External service backend plugins + +AWS Athena +AWS Batch +Flyte Interactive +Hive + +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: SDKs for writing tasks and workflows + +flytekit +flytekit-java + +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Flyte operators + +Airflow +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Deprecated integrations -/auto_examples/airflow_agent/index -/auto_examples/airflow_plugin/index -/auto_examples/athena_plugin/index -/auto_examples/aws_batch_plugin/index -/auto_examples/bigquery_agent/index -/auto_examples/chatgpt_agent/index -/auto_examples/k8s_dask_plugin/index -/auto_examples/databricks_agent/index -/auto_examples/dbt_plugin/index -/auto_examples/dolt_plugin/index -/auto_examples/duckdb_plugin/index -/auto_examples/flyteinteractive_plugin/index -/auto_examples/greatexpectations_plugin/index -/auto_examples/hive_plugin/index -/auto_examples/k8s_pod_plugin/index -/auto_examples/mlflow_plugin/index -/auto_examples/mmcloud_agent/index -/auto_examples/modin_plugin/index -/auto_examples/kfmpi_plugin/index -/auto_examples/neptune_plugin/index -/auto_examples/nim_plugin/index -/auto_examples/ollama_plugin/index -/auto_examples/onnx_plugin/index -/auto_examples/openai_batch_agent/index -/auto_examples/papermill_plugin/index -/auto_examples/pandera_plugin/index -/auto_examples/kfpytorch_plugin/index -/auto_examples/ray_plugin/index -/auto_examples/sagemaker_inference_agent/index -/auto_examples/sensor/index -/auto_examples/snowflake_agent/index -/auto_examples/k8s_spark_plugin/index -/auto_examples/sql_plugin/index -/auto_examples/kftensorflow_plugin/index -/auto_examples/wandb_plugin/index -/auto_examples/whylogs_plugin/index -Deprecated integrations +BigQuery plugin +Databricks plugin +Kubernetes pods +Snowflake plugin ``` diff --git a/examples/databricks_agent/README.md b/examples/databricks_agent/README.md index 6d6f4e754..0c1148926 100644 --- a/examples/databricks_agent/README.md +++ b/examples/databricks_agent/README.md @@ -1,6 +1,6 @@ (databricks_agent)= -# Databricks agent example +# Databricks agent ```{eval-rst} .. tags:: Spark, Integration, DistributedComputing, Data, Advanced diff --git a/examples/k8s_pod_plugin/README.md b/examples/k8s_pod_plugin/README.md index 4a1646be5..2f18928dd 100644 --- a/examples/k8s_pod_plugin/README.md +++ b/examples/k8s_pod_plugin/README.md @@ -4,6 +4,10 @@ .. tags:: Integration, Kubernetes, Advanced ``` +```{important} +This plugin is no longer needed and is here only for backwards compatibility. No new versions will be published after v1.13.x Please use the `pod_template` and `pod_template_name` arguments to `@task` as described in the {ref}`Kubernetes task pod configuration guide ` instead. +``` + Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. However, there may be situations where you need to run a job with more than one container or require additional capabilities, such as: diff --git a/examples/neptune_plugin/README.md b/examples/neptune_plugin/README.md index bbf76fbf4..67c4d845d 100644 --- a/examples/neptune_plugin/README.md +++ b/examples/neptune_plugin/README.md @@ -1,6 +1,6 @@ -(neptune)= +(neptune_plugin)= -# Neptune +# Neptune plugin ```{eval-rst} .. tags:: Integration, Data, Metrics, Intermediate @@ -10,7 +10,7 @@ ## Installation -To install the Flyte Neptune plugin, , run the following command: +To install the Flyte Neptune plugin, run the following command: ```bash pip install flytekitplugins-neptune diff --git a/examples/wandb_plugin/README.md b/examples/wandb_plugin/README.md index 53c3ddfd4..b5f1da584 100644 --- a/examples/wandb_plugin/README.md +++ b/examples/wandb_plugin/README.md @@ -1,4 +1,4 @@ -(wandb)= +(wandb_plugin)= # Weights and Biases From 1c55995e76f46dfb88f353547892f025b0413d07 Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Tue, 15 Oct 2024 11:01:46 -0500 Subject: [PATCH 15/28] Update contributing guide links (#1753) update contributing guide links Signed-off-by: nikki everett --- docs/integrations/index.md | 2 +- docs/tutorials/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 283ac34d7..c4e056c84 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -5,7 +5,7 @@ Flyte is designed to be highly extensible and can be customized in multiple ways. ```{note} -Want to contribute an example? Check out the {ref}`Documentation contribution guide `. +Want to contribute an integration example? Check out the {ref}`Tutorials and integration examples contribution guide `. ``` ## Flytekit plugins diff --git a/docs/tutorials/index.md b/docs/tutorials/index.md index 38aaa5b91..dc7b3b369 100644 --- a/docs/tutorials/index.md +++ b/docs/tutorials/index.md @@ -12,7 +12,7 @@ contributing samples easy. If this is your first time running these examples, fo {ref}`setup guide ` to get started. ```{note} -Want to contribute an example? Check out the [Documentation contribution guide](https://docs.flyte.org/en/latest/flytesnacks/contribute.html). +Want to contribute a tutorial? Check out the {ref}`Tutorials and integration examples contribution guide `. ``` ## 🤖 Model Training From c8fa39e8db2a188eff23e5f388fcf9ffed7ad7ba Mon Sep 17 00:00:00 2001 From: Omar Tarabai Date: Thu, 17 Oct 2024 02:23:51 +0200 Subject: [PATCH 16/28] Add PERIAN Job Platform Agent example (#1746) Add PERIAN Job Platform example Signed-off-by: Omar Tarabai --- _example_template/Dockerfile | 2 +- docs/integrations/index.md | 3 ++ examples/perian_agent/Dockerfile | 31 ++++++++++++++ examples/perian_agent/README.md | 21 ++++++++++ .../perian_agent/perian_agent/__init__.py | 0 examples/perian_agent/perian_agent/example.py | 40 +++++++++++++++++++ examples/perian_agent/requirements.in | 5 +++ 7 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 examples/perian_agent/Dockerfile create mode 100644 examples/perian_agent/README.md create mode 100644 examples/perian_agent/perian_agent/__init__.py create mode 100644 examples/perian_agent/perian_agent/example.py create mode 100644 examples/perian_agent/requirements.in diff --git a/_example_template/Dockerfile b/_example_template/Dockerfile index 65449f624..d56c81ef4 100644 --- a/_example_template/Dockerfile +++ b/_example_template/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.8-slim-buster -LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks WORKDIR /root ENV VENV /opt/venv diff --git a/docs/integrations/index.md b/docs/integrations/index.md index c4e056c84..d462d64a9 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -108,6 +108,8 @@ Native backend plugins can be executed without any external service dependencies - Execute tasks using the MemVerge Memory Machine Cloud agent. * - {doc}`OpenAI Batch ` - Submit requests for asynchronous batch processing on OpenAI. +* - {doc}`PERIAN Job Platform agent ` + - Execute tasks on PERIAN Job Platform. * - {doc}`Sensor agent ` - Run sensor jobs in your workflows with the sensor agent. * - {doc}`Snowflake agent ` @@ -235,6 +237,7 @@ ChatGPT agent Databricks agent Memory Machine Cloud agent OpenAI batch agent +PERIAN Job Platform agent Sensor agent Snowflake agent ``` diff --git a/examples/perian_agent/Dockerfile b/examples/perian_agent/Dockerfile new file mode 100644 index 000000000..d56c81ef4 --- /dev/null +++ b/examples/perian_agent/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.8-slim-buster +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# This is necessary for opencv to work +RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl + +WORKDIR /root + +ENV VENV /opt/venv +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in +RUN pip freeze + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/perian_agent/README.md b/examples/perian_agent/README.md new file mode 100644 index 000000000..a07769d9d --- /dev/null +++ b/examples/perian_agent/README.md @@ -0,0 +1,21 @@ +```{eval-rst} +.. tags:: Cloud, GPU, Integration, Advanced +``` + +(perian_agent)= + +# PERIAN Job Platform Agent + +The PERIAN Flyte Agent enables you to execute Flyte tasks on the [PERIAN Sky Platform](https://perian.io/). PERIAN allows the execution of any task on servers aggregated from multiple cloud providers. + +Example usage: + +```{auto-examples-toc} +example +``` + +To get started with PERIAN, see the [PERIAN documentation](https://perian.io/docs/overview) and the [PERIAN Flyte Agent documentation](https://perian.io/docs/flyte-getting-started). + +## Agent setup + +Consult the [PERIAN Flyte Agent setup guide](https://perian.io/docs/flyte-setup-guide). diff --git a/examples/perian_agent/perian_agent/__init__.py b/examples/perian_agent/perian_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/perian_agent/perian_agent/example.py b/examples/perian_agent/perian_agent/example.py new file mode 100644 index 000000000..a23060be2 --- /dev/null +++ b/examples/perian_agent/perian_agent/example.py @@ -0,0 +1,40 @@ +# %% [markdown] +# (example)= +# # PERIAN agent example usage +# +# This example shows how to use the PERIAN agent to execute tasks on PERIAN Job Platform. + +# %% +from flytekit import ImageSpec, task, workflow +from flytekitplugins.perian_job import PerianConfig + +image_spec = ImageSpec( + name="flyte-test", + registry="my-registry", + python_version="3.11", + apt_packages=["wget", "curl", "git"], + packages=[ + "flytekitplugins-perian-job", + ], +) + + +# %% [markdown] +# `PerianConfig` configures `PerianTask`. Tasks specified with `PerianConfig` will be executed on PERIAN Job Platform. + + +# %% +@task( + container_image=image_spec, + task_config=PerianConfig( + accelerators=1, + accelerator_type="A100", + ), +) +def perian_hello(name: str) -> str: + return f"hello {name}!" + + +@workflow +def my_wf(name: str = "world") -> str: + return perian_hello(name=name) diff --git a/examples/perian_agent/requirements.in b/examples/perian_agent/requirements.in new file mode 100644 index 000000000..b0b2423b0 --- /dev/null +++ b/examples/perian_agent/requirements.in @@ -0,0 +1,5 @@ +flytekit>=1.7.0 +wheel +matplotlib +flytekitplugins-deck-standard +flytekitplugins-perian-job From f12e916f81a4f878e96c94f03bd208f41b73c3d4 Mon Sep 17 00:00:00 2001 From: Sumana Sree Angajala <110307215+sumana-2705@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:23:19 +0530 Subject: [PATCH 17/28] Added examples for tensorflow types in Datatypes and IO section (#1739) * Added examples for tensorflow types in Datatypes and IO section Signed-off-by: sumana sree * Fixed linting errors Signed-off-by: sumana sree * updated tensorflow_type.py file to avoid linting errors Signed-off-by: sumana sree * Apply lint corrections using pre-commit hooks Signed-off-by: sumana sree * added required comments Signed-off-by: sumana sree * fixed error on importing tensorflow Signed-off-by: sumana sree --------- Signed-off-by: sumana sree --- .../data_types_and_io/tensorflow_type.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 examples/data_types_and_io/data_types_and_io/tensorflow_type.py diff --git a/examples/data_types_and_io/data_types_and_io/tensorflow_type.py b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py new file mode 100644 index 000000000..349f34b67 --- /dev/null +++ b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py @@ -0,0 +1,56 @@ +# Import necessary libraries and modules + +from flytekit import task, workflow +from flytekit.types.directory import TFRecordsDirectory +from flytekit.types.file import TFRecordFile + +custom_image = ImageSpec( + packages=["tensorflow", "tensorflow-datasets", "flytekitplugins-kftensorflow"], + registry="ghcr.io/flyteorg", +) + +if custom_image.is_container(): + import tensorflow as tf + + # TensorFlow Model + @task + def train_model() -> tf.keras.Model: + model = tf.keras.Sequential( + [tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax")] + ) + model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) + return model + + @task + def evaluate_model(model: tf.keras.Model, x: tf.Tensor, y: tf.Tensor) -> float: + loss, accuracy = model.evaluate(x, y) + return accuracy + + @workflow + def training_workflow(x: tf.Tensor, y: tf.Tensor) -> float: + model = train_model() + return evaluate_model(model=model, x=x, y=y) + + # TFRecord Files + @task + def process_tfrecord(file: TFRecordFile) -> int: + count = 0 + for record in tf.data.TFRecordDataset(file): + count += 1 + return count + + @workflow + def tfrecord_workflow(file: TFRecordFile) -> int: + return process_tfrecord(file=file) + + # TFRecord Directories + @task + def process_tfrecords_dir(dir: TFRecordsDirectory) -> int: + count = 0 + for record in tf.data.TFRecordDataset(dir.path): + count += 1 + return count + + @workflow + def tfrecords_dir_workflow(dir: TFRecordsDirectory) -> int: + return process_tfrecords_dir(dir=dir) From c602b8ea6f1ca29ea8b2fb53c3497a0000bab289 Mon Sep 17 00:00:00 2001 From: Shivam Sharma <66767992+10sharmashivam@users.noreply.github.com> Date: Tue, 22 Oct 2024 00:00:33 +0530 Subject: [PATCH 18/28] [DOCS] Troubleshooting Steps for MPI Operator (#1756) MPI troubleshooting steps Signed-off-by: 10sharmashivam <10sharmashivam@gmail.com> --- examples/kfmpi_plugin/README.md | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/examples/kfmpi_plugin/README.md b/examples/kfmpi_plugin/README.md index ad23c4729..0a43ff0ba 100644 --- a/examples/kfmpi_plugin/README.md +++ b/examples/kfmpi_plugin/README.md @@ -55,3 +55,37 @@ pyflyte run --remote \ ```{auto-examples-toc} mpi_mnist ``` + +## MPI Plugin Troubleshooting Guide + +This section covers common issues encountered during the setup of the MPI operator for distributed training jobs on Flyte. + +**Worker Pods Failing to Start (Insufficient Resources)** + +MPI worker pods may fail to start or exhibit scheduling issues, leading to job timeouts or failures. This often occurs due to resource constraints (CPU, memory, or GPU) in the cluster. + +1. Adjust Resource Requests: +Ensure that each worker pod has sufficient resources. You can adjust the resource requests in your task definition: + +``` + requests=Resources(cpu="", mem="") +``` + +Modify the CPU and memory values according to your cluster's available resources. This helps prevent pod scheduling failures caused by resource constraints. + +2. Check Pod Logs for Errors: +If the worker pods still fail to start, check the logs for any related errors: + +``` + kubectl logs -n +``` + +Look for resource allocation or worker communication errors. + +**Workflow Registration Method Errors (Timeouts or Deadlocks)** + +If your MPI workflow hangs or times out, it may be caused by an incorrect workflow registration method. + +1. Verify Registration Method: + When using a custom image, refer to the Flyte documentation on [Registering workflows](https://docs.flyte.org/en/latest/user_guide/flyte_fundamentals/registering_workflows.html#registration-patterns) to ensure you're following the correct registration method. + From 90184f916e13b0ed7dc58bf1f49f1b48ea950aca Mon Sep 17 00:00:00 2001 From: Nikki Everett Date: Tue, 22 Oct 2024 08:27:44 -0500 Subject: [PATCH 19/28] DOC-664 update flytekit docs links to point to flytekit repo (#1755) * fix flytekit docs link Signed-off-by: nikki everett * use correct formatting Signed-off-by: nikki everett * update flytekit and flytekit-java links Signed-off-by: nikki everett --------- Signed-off-by: nikki everett --- docs/integrations/index.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/integrations/index.md b/docs/integrations/index.md index d462d64a9..7d48d1531 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -171,9 +171,9 @@ The {ref}`community ` would love to help you build new SDKs. Currentl :header-rows: 0 :widths: 20 30 -* - [flytekit](https://flytekit.readthedocs.io) +* - [flytekit](https://github.com/flyteorg/flytekit) - The Python SDK for Flyte. -* - [flytekit-java](https://github.com/spotify/flytekit-java) +* - [flytekit-java](https://github.com/flyteorg/flytekit-java) - The Java/Scala SDK for Flyte. ``` @@ -259,8 +259,8 @@ Hive :hidden: :caption: SDKs for writing tasks and workflows -flytekit -flytekit-java +flytekit +flytekit-java ``` From 9aadec205a6e208c62e29f52873fb3d675965a51 Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Tue, 22 Oct 2024 18:59:00 -0400 Subject: [PATCH 20/28] Fix lint warning and import error in data_types_and_io tf example (#1762) * Fix lint warning and import error in data_types_and_io tf example Signed-off-by: Eduardo Apolinario * Remove use of is_container in tensorflow_type.py example Signed-off-by: Eduardo Apolinario * Fix lint warning Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .../data_types_and_io/tensorflow_type.py | 98 ++++++++++--------- examples/data_types_and_io/requirements.in | 1 + examples/kfmpi_plugin/README.md | 1 - 3 files changed, 53 insertions(+), 47 deletions(-) diff --git a/examples/data_types_and_io/data_types_and_io/tensorflow_type.py b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py index 349f34b67..3ec8aea71 100644 --- a/examples/data_types_and_io/data_types_and_io/tensorflow_type.py +++ b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py @@ -1,6 +1,6 @@ # Import necessary libraries and modules -from flytekit import task, workflow +from flytekit import ImageSpec, task, workflow from flytekit.types.directory import TFRecordsDirectory from flytekit.types.file import TFRecordFile @@ -9,48 +9,54 @@ registry="ghcr.io/flyteorg", ) -if custom_image.is_container(): - import tensorflow as tf - - # TensorFlow Model - @task - def train_model() -> tf.keras.Model: - model = tf.keras.Sequential( - [tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax")] - ) - model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) - return model - - @task - def evaluate_model(model: tf.keras.Model, x: tf.Tensor, y: tf.Tensor) -> float: - loss, accuracy = model.evaluate(x, y) - return accuracy - - @workflow - def training_workflow(x: tf.Tensor, y: tf.Tensor) -> float: - model = train_model() - return evaluate_model(model=model, x=x, y=y) - - # TFRecord Files - @task - def process_tfrecord(file: TFRecordFile) -> int: - count = 0 - for record in tf.data.TFRecordDataset(file): - count += 1 - return count - - @workflow - def tfrecord_workflow(file: TFRecordFile) -> int: - return process_tfrecord(file=file) - - # TFRecord Directories - @task - def process_tfrecords_dir(dir: TFRecordsDirectory) -> int: - count = 0 - for record in tf.data.TFRecordDataset(dir.path): - count += 1 - return count - - @workflow - def tfrecords_dir_workflow(dir: TFRecordsDirectory) -> int: - return process_tfrecords_dir(dir=dir) +import tensorflow as tf + + +# TensorFlow Model +@task +def train_model() -> tf.keras.Model: + model = tf.keras.Sequential( + [tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax")] + ) + model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) + return model + + +@task +def evaluate_model(model: tf.keras.Model, x: tf.Tensor, y: tf.Tensor) -> float: + loss, accuracy = model.evaluate(x, y) + return accuracy + + +@workflow +def training_workflow(x: tf.Tensor, y: tf.Tensor) -> float: + model = train_model() + return evaluate_model(model=model, x=x, y=y) + + +# TFRecord Files +@task +def process_tfrecord(file: TFRecordFile) -> int: + count = 0 + for record in tf.data.TFRecordDataset(file): + count += 1 + return count + + +@workflow +def tfrecord_workflow(file: TFRecordFile) -> int: + return process_tfrecord(file=file) + + +# TFRecord Directories +@task +def process_tfrecords_dir(dir: TFRecordsDirectory) -> int: + count = 0 + for record in tf.data.TFRecordDataset(dir.path): + count += 1 + return count + + +@workflow +def tfrecords_dir_workflow(dir: TFRecordsDirectory) -> int: + return process_tfrecords_dir(dir=dir) diff --git a/examples/data_types_and_io/requirements.in b/examples/data_types_and_io/requirements.in index 79bd303e5..2bcce8b12 100644 --- a/examples/data_types_and_io/requirements.in +++ b/examples/data_types_and_io/requirements.in @@ -1,4 +1,5 @@ pandas torch tabulate +tensorflow pyarrow diff --git a/examples/kfmpi_plugin/README.md b/examples/kfmpi_plugin/README.md index 0a43ff0ba..1eeeac68b 100644 --- a/examples/kfmpi_plugin/README.md +++ b/examples/kfmpi_plugin/README.md @@ -88,4 +88,3 @@ If your MPI workflow hangs or times out, it may be caused by an incorrect workfl 1. Verify Registration Method: When using a custom image, refer to the Flyte documentation on [Registering workflows](https://docs.flyte.org/en/latest/user_guide/flyte_fundamentals/registering_workflows.html#registration-patterns) to ensure you're following the correct registration method. - From 135ae99fdec79015fc4c465976a65ca5190957c3 Mon Sep 17 00:00:00 2001 From: Yenting Chen <42114946+DenChenn@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:48:27 +0800 Subject: [PATCH 21/28] Add example for file streaming (#1759) Add example for directory streaming Signed-off-by: DenChenn --- .../data_types_and_io/file_streaming.py | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 examples/data_types_and_io/data_types_and_io/file_streaming.py diff --git a/examples/data_types_and_io/data_types_and_io/file_streaming.py b/examples/data_types_and_io/data_types_and_io/file_streaming.py new file mode 100644 index 000000000..0e9a84952 --- /dev/null +++ b/examples/data_types_and_io/data_types_and_io/file_streaming.py @@ -0,0 +1,44 @@ +from flytekit import task, workflow +from flytekit.types.file import FlyteFile +from flytekit.types.directory import FlyteDirectory +import pandas as pd +import os + + +@task() +def remove_some_rows(ff: FlyteFile) -> FlyteFile: + """ + Remove the rows that the value of city is 'Seattle'. + This is an example with streaming support. + """ + new_file = FlyteFile.new_remote_file("data_without_seattle.csv") + with ff.open("r") as r: + with new_file.open("w") as w: + df = pd.read_csv(r) + df = df[df["City"] != "Seattle"] + df.to_csv(w, index=False) + return new_file + + +@task +def process_folder(fd: FlyteDirectory) -> FlyteDirectory: + out_fd = FlyteDirectory.new_remote("folder-copy") + for base, x in fd.crawl(): + src = str(os.path.join(base, x)) + out_file = out_fd.new_file(x) + with FlyteFile(src).open("rb") as f: + with out_file.open("wb") as o: + o.write(f.read()) + # The output path will be s3://my-s3-bucket/data/77/--0/folder-copy + return out_fd + + +@workflow() +def wf(): + remove_some_rows(ff=FlyteFile("s3://custom-bucket/data.csv")) + process_folder(fd=FlyteDirectory("s3://my-s3-bucket/folder")) + return + + +if __name__ == "__main__": + print(f"Running wf() {wf()}") From 0b350cd3b42fe57ea06880a374d4797d54b9b3ac Mon Sep 17 00:00:00 2001 From: Alex Wu <66259759+popojk@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:49:24 +0800 Subject: [PATCH 22/28] add ContainerTask cache example code (#1758) Signed-off-by: Alex Wu --- .../customizing_dependencies/raw_container.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/customizing_dependencies/customizing_dependencies/raw_container.py b/examples/customizing_dependencies/customizing_dependencies/raw_container.py index e8dc9a5ed..9d986da56 100644 --- a/examples/customizing_dependencies/customizing_dependencies/raw_container.py +++ b/examples/customizing_dependencies/customizing_dependencies/raw_container.py @@ -1,6 +1,7 @@ import logging from flytekit import ContainerTask, kwtypes, task, workflow +from flytekit.core.base_task import TaskMetadata logger = logging.getLogger(__file__) @@ -25,6 +26,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_python = ContainerTask( @@ -41,6 +43,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_r = ContainerTask( @@ -58,6 +61,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_haskell = ContainerTask( @@ -73,6 +77,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_julia = ContainerTask( @@ -89,6 +94,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) From 61438bd4b20b5e128fd5ca82e654dc8118a33a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=9F=E5=AE=B6=E7=91=8B?= <36886416+JiangJiaWei1103@users.noreply.github.com> Date: Wed, 23 Oct 2024 05:49:40 +0200 Subject: [PATCH 23/28] fix: Recover the Expected Behaviors of Example Workflows (#1760) 1. Demonstrate the logic of the workflow `shape_properties_accept_conditional_output`. 2. Swap the descriptions of expected outputs of the workflow `consume_task_output`. Signed-off-by: JiaWei Jiang --- .../advanced_composition/conditional.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/examples/advanced_composition/advanced_composition/conditional.py b/examples/advanced_composition/advanced_composition/conditional.py index 7c314162b..f29c0770a 100644 --- a/examples/advanced_composition/advanced_composition/conditional.py +++ b/examples/advanced_composition/advanced_composition/conditional.py @@ -79,7 +79,10 @@ def shape_properties_accept_conditional_output(radius: float) -> float: if __name__ == "__main__": - print(f"Circumference of circle x Area of circle (radius={radius_small}): {shape_properties(radius=5.0)}") + radius_small = 0.5 + print( + f"Circumference of circle (radius={radius_small}) x Area of circle (radius={calculate_circle_circumference(radius=radius_small)}): {shape_properties_accept_conditional_output(radius=radius_small)}" + ) # Using the output of a previous task in a conditional @@ -213,8 +216,10 @@ def noop_in_conditional(radius: float, seed: int = 5) -> float: if __name__ == "__main__": default_seed_output = consume_task_output(radius=0.4) print( - f"Executing consume_task_output(0.4) with default seed=5. Expected output: calculate_circle_circumference => {default_seed_output}" + f"Executing consume_task_output(0.4) with default seed=5. Expected output: calculate_circle_area => {default_seed_output}" ) custom_seed_output = consume_task_output(radius=0.4, seed=7) - print(f"Executing consume_task_output(0.4, seed=7). Expected output: calculate_circle_area => {custom_seed_output}") + print( + f"Executing consume_task_output(0.4, seed=7). Expected output: calculate_circle_circumference => {custom_seed_output}" + ) From 8ef422017b206b4dcff06cf56677f7793dad3caa Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Wed, 23 Oct 2024 12:58:14 -0400 Subject: [PATCH 24/28] Allow csv files (#1763) * Allow csv files Signed-off-by: Eduardo Apolinario * Fix lint warning Signed-off-by: Eduardo Apolinario --------- Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- .gitignore | 1 - .../data_types_and_io/data_types_and_io/file_streaming.py | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index dcb73da03..cd4020676 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ __pycache__/ .idea .jpg .ipynb_checkpoints/ -*.csv *.dat .DS_Store gen_modules diff --git a/examples/data_types_and_io/data_types_and_io/file_streaming.py b/examples/data_types_and_io/data_types_and_io/file_streaming.py index 0e9a84952..73e1add2a 100644 --- a/examples/data_types_and_io/data_types_and_io/file_streaming.py +++ b/examples/data_types_and_io/data_types_and_io/file_streaming.py @@ -1,8 +1,9 @@ +import os + +import pandas as pd from flytekit import task, workflow -from flytekit.types.file import FlyteFile from flytekit.types.directory import FlyteDirectory -import pandas as pd -import os +from flytekit.types.file import FlyteFile @task() From 2ab1024356589054fc91dceaa36964465307e576 Mon Sep 17 00:00:00 2001 From: Shivam Sharma <66767992+10sharmashivam@users.noreply.github.com> Date: Wed, 30 Oct 2024 19:55:48 +0530 Subject: [PATCH 25/28] Test_data added for #4764 (#1764) * Test_data added Signed-off-by: 10sharmashivam <10sharmashivam@gmail.com> * Extra Line at end removed Signed-off-by: 10sharmashivam <10sharmashivam@gmail.com> --------- Signed-off-by: 10sharmashivam <10sharmashivam@gmail.com> --- .../data_types_and_io/file.py | 8 +- .../data_types_and_io/test_data/biostats.csv | 19 ++ .../data_types_and_io/test_data/faithful.csv | 273 ++++++++++++++++++ 3 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 examples/data_types_and_io/test_data/biostats.csv create mode 100644 examples/data_types_and_io/test_data/faithful.csv diff --git a/examples/data_types_and_io/data_types_and_io/file.py b/examples/data_types_and_io/data_types_and_io/file.py index ede0fd7ae..74d83ae66 100644 --- a/examples/data_types_and_io/data_types_and_io/file.py +++ b/examples/data_types_and_io/data_types_and_io/file.py @@ -37,17 +37,17 @@ def normalize_columns( normalized_data[colname] = [(x - mean) / std for x in values] # write to local path - out_path = Path(flytekit.current_context().working_directory) / f"normalized-{Path(csv_url.path).stem}.csv" - with out_path.open(mode="w") as output_file: + out_path = str(Path(flytekit.current_context().working_directory) / f"normalized-{Path(csv_url.path).stem}.csv") + with open(out_path, mode="w") as output_file: writer = csv.DictWriter(output_file, fieldnames=columns_to_normalize) writer.writeheader() for row in zip(*normalized_data.values()): writer.writerow({k: row[i] for i, k in enumerate(columns_to_normalize)}) if output_location: - return FlyteFile(path=out_path, remote_path=output_location) + return FlyteFile(path=str(out_path), remote_path=output_location) else: - return FlyteFile(path=out_path) + return FlyteFile(path=str(out_path)) # Define a workflow. The `normalize_csv_files` workflow has an `output_location` argument which is passed diff --git a/examples/data_types_and_io/test_data/biostats.csv b/examples/data_types_and_io/test_data/biostats.csv new file mode 100644 index 000000000..b09efca0c --- /dev/null +++ b/examples/data_types_and_io/test_data/biostats.csv @@ -0,0 +1,19 @@ +"Name", "Sex", "Age", "Height (in)", "Weight (lbs)" +"Alex", "M", 41, 74, 170 +"Bert", "M", 42, 68, 166 +"Carl", "M", 32, 70, 155 +"Dave", "M", 39, 72, 167 +"Elly", "F", 30, 66, 124 +"Fran", "F", 33, 66, 115 +"Gwen", "F", 26, 64, 121 +"Hank", "M", 30, 71, 158 +"Ivan", "M", 53, 72, 175 +"Jake", "M", 32, 69, 143 +"Kate", "F", 47, 69, 139 +"Luke", "M", 34, 72, 163 +"Myra", "F", 23, 62, 98 +"Neil", "M", 36, 75, 160 +"Omar", "M", 38, 70, 145 +"Page", "F", 31, 67, 135 +"Quin", "M", 29, 71, 176 +"Ruth", "F", 28, 65, 131 diff --git a/examples/data_types_and_io/test_data/faithful.csv b/examples/data_types_and_io/test_data/faithful.csv new file mode 100644 index 000000000..a11030a50 --- /dev/null +++ b/examples/data_types_and_io/test_data/faithful.csv @@ -0,0 +1,273 @@ +"Index", "Eruption length (mins)","Eruption wait (mins)" + 1, 3.600, 79 + 2, 1.800, 54 + 3, 3.333, 74 + 4, 2.283, 62 + 5, 4.533, 85 + 6, 2.883, 55 + 7, 4.700, 88 + 8, 3.600, 85 + 9, 1.950, 51 + 10, 4.350, 85 + 11, 1.833, 54 + 12, 3.917, 84 + 13, 4.200, 78 + 14, 1.750, 47 + 15, 4.700, 83 + 16, 2.167, 52 + 17, 1.750, 62 + 18, 4.800, 84 + 19, 1.600, 52 + 20, 4.250, 79 + 21, 1.800, 51 + 22, 1.750, 47 + 23, 3.450, 78 + 24, 3.067, 69 + 25, 4.533, 74 + 26, 3.600, 83 + 27, 1.967, 55 + 28, 4.083, 76 + 29, 3.850, 78 + 30, 4.433, 79 + 31, 4.300, 73 + 32, 4.467, 77 + 33, 3.367, 66 + 34, 4.033, 80 + 35, 3.833, 74 + 36, 2.017, 52 + 37, 1.867, 48 + 38, 4.833, 80 + 39, 1.833, 59 + 40, 4.783, 90 + 41, 4.350, 80 + 42, 1.883, 58 + 43, 4.567, 84 + 44, 1.750, 58 + 45, 4.533, 73 + 46, 3.317, 83 + 47, 3.833, 64 + 48, 2.100, 53 + 49, 4.633, 82 + 50, 2.000, 59 + 51, 4.800, 75 + 52, 4.716, 90 + 53, 1.833, 54 + 54, 4.833, 80 + 55, 1.733, 54 + 56, 4.883, 83 + 57, 3.717, 71 + 58, 1.667, 64 + 59, 4.567, 77 + 60, 4.317, 81 + 61, 2.233, 59 + 62, 4.500, 84 + 63, 1.750, 48 + 64, 4.800, 82 + 65, 1.817, 60 + 66, 4.400, 92 + 67, 4.167, 78 + 68, 4.700, 78 + 69, 2.067, 65 + 70, 4.700, 73 + 71, 4.033, 82 + 72, 1.967, 56 + 73, 4.500, 79 + 74, 4.000, 71 + 75, 1.983, 62 + 76, 5.067, 76 + 77, 2.017, 60 + 78, 4.567, 78 + 79, 3.883, 76 + 80, 3.600, 83 + 81, 4.133, 75 + 82, 4.333, 82 + 83, 4.100, 70 + 84, 2.633, 65 + 85, 4.067, 73 + 86, 4.933, 88 + 87, 3.950, 76 + 88, 4.517, 80 + 89, 2.167, 48 + 90, 4.000, 86 + 91, 2.200, 60 + 92, 4.333, 90 + 93, 1.867, 50 + 94, 4.817, 78 + 95, 1.833, 63 + 96, 4.300, 72 + 97, 4.667, 84 + 98, 3.750, 75 + 99, 1.867, 51 +100, 4.900, 82 +101, 2.483, 62 +102, 4.367, 88 +103, 2.100, 49 +104, 4.500, 83 +105, 4.050, 81 +106, 1.867, 47 +107, 4.700, 84 +108, 1.783, 52 +109, 4.850, 86 +110, 3.683, 81 +111, 4.733, 75 +112, 2.300, 59 +113, 4.900, 89 +114, 4.417, 79 +115, 1.700, 59 +116, 4.633, 81 +117, 2.317, 50 +118, 4.600, 85 +119, 1.817, 59 +120, 4.417, 87 +121, 2.617, 53 +122, 4.067, 69 +123, 4.250, 77 +124, 1.967, 56 +125, 4.600, 88 +126, 3.767, 81 +127, 1.917, 45 +128, 4.500, 82 +129, 2.267, 55 +130, 4.650, 90 +131, 1.867, 45 +132, 4.167, 83 +133, 2.800, 56 +134, 4.333, 89 +135, 1.833, 46 +136, 4.383, 82 +137, 1.883, 51 +138, 4.933, 86 +139, 2.033, 53 +140, 3.733, 79 +141, 4.233, 81 +142, 2.233, 60 +143, 4.533, 82 +144, 4.817, 77 +145, 4.333, 76 +146, 1.983, 59 +147, 4.633, 80 +148, 2.017, 49 +149, 5.100, 96 +150, 1.800, 53 +151, 5.033, 77 +152, 4.000, 77 +153, 2.400, 65 +154, 4.600, 81 +155, 3.567, 71 +156, 4.000, 70 +157, 4.500, 81 +158, 4.083, 93 +159, 1.800, 53 +160, 3.967, 89 +161, 2.200, 45 +162, 4.150, 86 +163, 2.000, 58 +164, 3.833, 78 +165, 3.500, 66 +166, 4.583, 76 +167, 2.367, 63 +168, 5.000, 88 +169, 1.933, 52 +170, 4.617, 93 +171, 1.917, 49 +172, 2.083, 57 +173, 4.583, 77 +174, 3.333, 68 +175, 4.167, 81 +176, 4.333, 81 +177, 4.500, 73 +178, 2.417, 50 +179, 4.000, 85 +180, 4.167, 74 +181, 1.883, 55 +182, 4.583, 77 +183, 4.250, 83 +184, 3.767, 83 +185, 2.033, 51 +186, 4.433, 78 +187, 4.083, 84 +188, 1.833, 46 +189, 4.417, 83 +190, 2.183, 55 +191, 4.800, 81 +192, 1.833, 57 +193, 4.800, 76 +194, 4.100, 84 +195, 3.966, 77 +196, 4.233, 81 +197, 3.500, 87 +198, 4.366, 77 +199, 2.250, 51 +200, 4.667, 78 +201, 2.100, 60 +202, 4.350, 82 +203, 4.133, 91 +204, 1.867, 53 +205, 4.600, 78 +206, 1.783, 46 +207, 4.367, 77 +208, 3.850, 84 +209, 1.933, 49 +210, 4.500, 83 +211, 2.383, 71 +212, 4.700, 80 +213, 1.867, 49 +214, 3.833, 75 +215, 3.417, 64 +216, 4.233, 76 +217, 2.400, 53 +218, 4.800, 94 +219, 2.000, 55 +220, 4.150, 76 +221, 1.867, 50 +222, 4.267, 82 +223, 1.750, 54 +224, 4.483, 75 +225, 4.000, 78 +226, 4.117, 79 +227, 4.083, 78 +228, 4.267, 78 +229, 3.917, 70 +230, 4.550, 79 +231, 4.083, 70 +232, 2.417, 54 +233, 4.183, 86 +234, 2.217, 50 +235, 4.450, 90 +236, 1.883, 54 +237, 1.850, 54 +238, 4.283, 77 +239, 3.950, 79 +240, 2.333, 64 +241, 4.150, 75 +242, 2.350, 47 +243, 4.933, 86 +244, 2.900, 63 +245, 4.583, 85 +246, 3.833, 82 +247, 2.083, 57 +248, 4.367, 82 +249, 2.133, 67 +250, 4.350, 74 +251, 2.200, 54 +252, 4.450, 83 +253, 3.567, 73 +254, 4.500, 73 +255, 4.150, 88 +256, 3.817, 80 +257, 3.917, 71 +258, 4.450, 83 +259, 2.000, 56 +260, 4.283, 79 +261, 4.767, 78 +262, 4.533, 84 +263, 1.850, 58 +264, 4.250, 83 +265, 1.983, 43 +266, 2.250, 60 +267, 4.750, 75 +268, 4.117, 81 +269, 2.150, 46 +270, 4.417, 90 +271, 1.817, 46 +272, 4.467, 74 From 9af4cac647f7f66b1dd82d6e1a21397780efbcaf Mon Sep 17 00:00:00 2001 From: Shivam Sharma <66767992+10sharmashivam@users.noreply.github.com> Date: Thu, 31 Oct 2024 01:36:55 +0530 Subject: [PATCH 26/28] Broken PO tests corrected (#1766) Signed-off-by: 10sharmashivam <10sharmashivam@gmail.com> --- examples/data_types_and_io/data_types_and_io/file.py | 4 ++-- examples/data_types_and_io/data_types_and_io/folder.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/data_types_and_io/data_types_and_io/file.py b/examples/data_types_and_io/data_types_and_io/file.py index 74d83ae66..1c89c1e18 100644 --- a/examples/data_types_and_io/data_types_and_io/file.py +++ b/examples/data_types_and_io/data_types_and_io/file.py @@ -72,12 +72,12 @@ def normalize_csv_file( if __name__ == "__main__": default_files = [ ( - "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/biostats.csv", ["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], ["Age"], ), ( - "https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/faithful.csv", ["Index", "Eruption length (mins)", "Eruption wait (mins)"], ["Eruption length (mins)"], ), diff --git a/examples/data_types_and_io/data_types_and_io/folder.py b/examples/data_types_and_io/data_types_and_io/folder.py index 46b0e5e5a..4b79e3233 100644 --- a/examples/data_types_and_io/data_types_and_io/folder.py +++ b/examples/data_types_and_io/data_types_and_io/folder.py @@ -93,8 +93,8 @@ def download_and_normalize_csv_files( # Run the workflow locally if __name__ == "__main__": csv_urls = [ - "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", - "https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/biostats.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/faithful.csv", ] columns_metadata = [ ["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], From fbe246177d321e0491b652084c1c3a9c0cb0d35c Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Fri, 8 Nov 2024 15:15:56 -0500 Subject: [PATCH 27/28] Update README.md (#1769) Signed-off-by: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f6ad212d4..5ccbe2ed7 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ The [Tutorials](https://docs.flyte.org/en/latest/flytesnacks/tutorials/index.htm the [Integrations](https://docs.flyte.org/en/latest/flytesnacks/integrations/index.html) section demonstrates how to use Flyte with other tools and frameworks. + > Flytesnacks currently has all examples in Python (Flytekit Python SDK). From 72aa5b235eee9335814f986e8c6cda5018af9b74 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 13 Nov 2024 17:21:31 -0500 Subject: [PATCH 28/28] Adds comet-ml plugin example (#1702) Signed-off-by: Thomas J. Fan --- docs/integrations/index.md | 3 + examples/comet_ml_plugin/Dockerfile | 27 +++ examples/comet_ml_plugin/README.md | 36 ++++ .../comet_ml_plugin/__init__.py | 0 .../comet_ml_plugin/comet_ml_example.py | 157 ++++++++++++++++++ examples/comet_ml_plugin/requirements.in | 1 + 6 files changed, 224 insertions(+) create mode 100644 examples/comet_ml_plugin/Dockerfile create mode 100644 examples/comet_ml_plugin/README.md create mode 100644 examples/comet_ml_plugin/comet_ml_plugin/__init__.py create mode 100644 examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py create mode 100644 examples/comet_ml_plugin/requirements.in diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 7d48d1531..c89d0e29d 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -18,6 +18,8 @@ Flytekit functionality. For comparison, these plugins can be thought of like :header-rows: 0 :widths: 20 30 +* - {doc}`Comet ` + - `comet-ml`: Comet’s machine learning platform. * - {doc}`DBT ` - Run and test your `dbt` pipelines in Flyte. * - {doc}`Dolt ` @@ -195,6 +197,7 @@ constructs natively within other orchestration tools. :hidden: :caption: Flytekit plugins +Comet DBT Dolt DuckDB diff --git a/examples/comet_ml_plugin/Dockerfile b/examples/comet_ml_plugin/Dockerfile new file mode 100644 index 000000000..4e04f77bd --- /dev/null +++ b/examples/comet_ml_plugin/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.11-slim-bookworm +LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +WORKDIR /root + +ENV VENV /opt/venv +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/comet_ml_plugin/README.md b/examples/comet_ml_plugin/README.md new file mode 100644 index 000000000..40a962a52 --- /dev/null +++ b/examples/comet_ml_plugin/README.md @@ -0,0 +1,36 @@ +(comet_ml)= + +# Comet ML + +```{eval-rst} +.. tags:: Integration, Data, Metrics, Intermediate +``` + +Comet’s machine learning platform integrates with your existing infrastructure and tools so you can manage, visualize, and optimize models from training runs to production monitoring. This plugin integrates Flyte with Comet by configuring links between the two platforms. + +To install the plugin, run: + +```bash +pip install flytekitplugins-comet-ml +``` + +Comet requires an API key to authenticate with their platform. In the above example, a secret is created using +[Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). + +To enable linking from the Flyte side panel to Comet.ml, add the following to Flyte's configuration: + +```yaml +plugins: + logs: + dynamic-log-links: + - comet-ml-execution-id: + displayName: Comet + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .executionName }}{{ .nodeId }}{{ .taskRetryAttempt }}{{ .taskConfig.link_suffix }}" + - comet-ml-custom-id: + displayName: Comet + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .taskConfig.experiment_key }}" +``` + +```{auto-examples-toc} +comet_ml_example +``` diff --git a/examples/comet_ml_plugin/comet_ml_plugin/__init__.py b/examples/comet_ml_plugin/comet_ml_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py b/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py new file mode 100644 index 000000000..af5c92e71 --- /dev/null +++ b/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py @@ -0,0 +1,157 @@ +# %% [markdown] +# (comet_ml_example)= +# +# # Comet Example +# Comet’s machine learning platform integrates with your existing infrastructure and +# tools so you can manage, visualize, and optimize models from training runs to +# production monitoring. This plugin integrates Flyte with Comet by configuring +# links between the two platforms. +import os +import os.path + +from flytekit import ( + ImageSpec, + Secret, + current_context, + task, + workflow, +) +from flytekit.types.directory import FlyteDirectory +from flytekitplugins.comet_ml import comet_ml_login + +# %% [markdown] +# First, we specify the project and workspace that we will use with Comet's platform +# Please update `PROJECT_NAME` and `WORKSPACE` to the values associated with your account. +# %% +PROJECT_NAME = "flytekit-comet-ml-v1" +WORKSPACE = "thomas-unionai" + +# %% [markdown] +# W&B requires an API key to authenticate with Comet. In the above example, +# the secret is created using +# [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). +# %% +secret = Secret(key="comet-ml-key", group="comet-ml-group") + +# %% [markdown] +# Next, we use `ImageSpec` to construct a container that contains the dependencies for this +# task: +# %% + +REGISTRY = os.getenv("REGISTRY", "localhost:30000") +image = ImageSpec( + name="comet-ml", + packages=[ + "torch==2.3.1", + "comet-ml==3.43.2", + "lightning==2.3.0", + "flytekitplugins-comet-ml", + "torchvision", + ], + builder="default", + registry=REGISTRY, +) + + +# %% [markdown] +# Here, we use a Flyte task to download the dataset and cache it: +# %% +@task(cache=True, cache_version="2", container_image=image) +def get_dataset() -> FlyteDirectory: + from torchvision.datasets import MNIST + + ctx = current_context() + dataset_dir = os.path.join(ctx.working_directory, "datasetset") + os.makedirs(dataset_dir, exist_ok=True) + + # Download training and evaluation dataset + MNIST(dataset_dir, train=True, download=True) + MNIST(dataset_dir, train=False, download=True) + + return dataset_dir + + +# %% +# The `comet_ml_login` decorator calls `comet_ml.init` and configures it to use Flyte's +# execution id as the Comet's experiment key. The body of the task is PyTorch Lightning +# training code, where we pass `CometLogger` into the `Trainer`'s `logger`. +@task( + secret_requests=[secret], + container_image=image, +) +@comet_ml_login( + project_name=PROJECT_NAME, + workspace=WORKSPACE, + secret=secret, +) +def train_lightning(dataset: FlyteDirectory, hidden_layer_size: int): + import pytorch_lightning as pl + import torch + import torch.nn.functional as F + from pytorch_lightning import Trainer + from pytorch_lightning.loggers import CometLogger + from torch.utils.data import DataLoader + from torchvision import transforms + from torchvision.datasets import MNIST + + class Model(pl.LightningModule): + def __init__(self, layer_size=784, hidden_layer_size=256): + super().__init__() + self.save_hyperparameters() + self.layers = torch.nn.Sequential( + torch.nn.Linear(layer_size, hidden_layer_size), + torch.nn.Linear(hidden_layer_size, 10), + ) + + def forward(self, x): + return torch.relu(self.layers(x.view(x.size(0), -1))) + + def training_step(self, batch, batch_nb): + x, y = batch + loss = F.cross_entropy(self(x), y) + self.logger.log_metrics({"train_loss": loss}, step=batch_nb) + return loss + + def validation_step(self, batch, batch_nb): + x, y = batch + y_hat = self.forward(x) + loss = F.cross_entropy(y_hat, y) + self.logger.log_metrics({"val_loss": loss}, step=batch_nb) + return loss + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=0.02) + + dataset.download() + train_ds = MNIST(dataset, train=True, download=False, transform=transforms.ToTensor()) + eval_ds = MNIST(dataset, train=False, download=False, transform=transforms.ToTensor()) + train_loader = DataLoader(train_ds, batch_size=32) + eval_loader = DataLoader(eval_ds, batch_size=32) + + comet_logger = CometLogger() + comet_logger.log_hyperparams({"batch_size": 32}) + + model = Model(hidden_layer_size=hidden_layer_size) + trainer = Trainer(max_epochs=1, fast_dev_run=True, logger=comet_logger) + trainer.fit(model, train_loader, eval_loader) + + +@workflow +def main(hidden_layer_size: int = 32): + dataset = get_dataset() + train_lightning(dataset=dataset, hidden_layer_size=hidden_layer_size) + + +# %% [markdown] +# To enable dynamic log links, add plugin to Flyte's configuration file: +# ```yaml +# plugins: +# logs: +# dynamic-log-links: +# - comet-ml-execution-id: +# displayName: Comet +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .executionName }}{{ .nodeId }}{{ .taskRetryAttempt }}{{ .taskConfig.link_suffix }}" +# - comet-ml-custom-id: +# displayName: Comet +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .taskConfig.experiment_key }}" +# ``` diff --git a/examples/comet_ml_plugin/requirements.in b/examples/comet_ml_plugin/requirements.in new file mode 100644 index 000000000..bfadcb009 --- /dev/null +++ b/examples/comet_ml_plugin/requirements.in @@ -0,0 +1 @@ +flytekitplugins-comet-ml