From b003423eb0df37cb938f10ee088f144196e2067e Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan"
Date: Mon, 12 Aug 2024 11:39:19 -0400
Subject: [PATCH 01/35] Fix monodocs build error (#1720)
Signed-off-by: Thomas J. Fan
---
.github/workflows/monodocs_build.yml | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/.github/workflows/monodocs_build.yml b/.github/workflows/monodocs_build.yml
index 66f4e1a4e..7460e7bcf 100644
--- a/.github/workflows/monodocs_build.yml
+++ b/.github/workflows/monodocs_build.yml
@@ -29,6 +29,22 @@ jobs:
with:
auto-update-conda: true
python-version: 3.9
+ - name: Remove large directories
+ run: |
+ sudo rm -rf \
+ "$AGENT_TOOLSDIRECTORY" \
+ /opt/google/chrome \
+ /opt/microsoft/msedge \
+ /opt/microsoft/powershell \
+ /opt/pipx \
+ /usr/lib/mono \
+ /usr/local/julia* \
+ /usr/local/lib/android \
+ /usr/local/lib/node_modules \
+ /usr/local/share/chromium \
+ /usr/local/share/powershell \
+ /usr/share/dotnet \
+ /usr/share/swift
- shell: bash -el {0}
working-directory: ${{ github.workspace }}/flyte
run: |
From 7a300ac43f3da41a4e01bd4dae9d45e8c0094ce3 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan"
Date: Tue, 13 Aug 2024 14:54:36 -0400
Subject: [PATCH 02/35] Fixes imagespec documentation (#1719)
Signed-off-by: Thomas J. Fan
Co-authored-by: Kevin Su
---
examples/development_lifecycle/development_lifecycle/decks.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/examples/development_lifecycle/development_lifecycle/decks.py b/examples/development_lifecycle/development_lifecycle/decks.py
index 57f9fff22..d849bd339 100644
--- a/examples/development_lifecycle/development_lifecycle/decks.py
+++ b/examples/development_lifecycle/development_lifecycle/decks.py
@@ -12,7 +12,9 @@
# For more information, see
# https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html#image-spec-example
-custom_image = ImageSpec(packages=["plotly"], registry="ghcr.io/flyteorg")
+custom_image = ImageSpec(
+ packages=["plotly", "scikit-learn", "flytekitplugins-deck-standard"], registry="ghcr.io/flyteorg"
+)
if custom_image.is_container():
import plotly
From a8fbfea323b7bd4b212d5c7041666d240132c24e Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan"
Date: Thu, 22 Aug 2024 15:50:11 -0400
Subject: [PATCH 03/35] Fixes some CI failures when serializing plugins (#1721)
Signed-off-by: Thomas J. Fan
---
.github/workflows/checks.yml | 8 +++----
examples/data_types_and_io/Dockerfile | 7 ++++--
examples/data_types_and_io/requirements.in | 1 +
examples/development_lifecycle/Dockerfile | 4 +++-
.../development_lifecycle/requirements.in | 1 +
examples/duckdb_plugin/Dockerfile | 4 +++-
examples/duckdb_plugin/requirements.in | 1 +
.../pytorch_lightning_mnist_autoencoder.py | 23 +------------------
.../productionizing/reference_task.py | 13 ++++-------
9 files changed, 24 insertions(+), 38 deletions(-)
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 316dc57a2..9c7b9fb10 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -87,8 +87,8 @@ jobs:
working-directory: examples/${{ matrix.example }}
run: |
pip install uv
- uv venv
- source .venv/bin/activate
+ uv venv $GITHUB_WORKSPACE/.venv
+ source $GITHUB_WORKSPACE/.venv/bin/activate
if [ -f requirements.in ]; then uv pip install -r requirements.in; fi
uv pip install "flytekit>=1.12.2" "numpy<2.0.0"
pip freeze
@@ -126,7 +126,7 @@ jobs:
run: |
export FLYTE_PUSH_IMAGE_SPEC=${{ github.event_name != 'pull_request' }}
default_image=ghcr.io/flyteorg/flytecookbook:${{ matrix.example }}-${{ github.sha }}
- source .venv/bin/activate
+ source $GITHUB_WORKSPACE/.venv/bin/activate
pyflyte \
--pkgs ${{ matrix.example }} package \
--image $default_image \
@@ -293,7 +293,7 @@ jobs:
pip install uv
uv venv
source .venv/bin/activate
- uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate
+ uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow
pip freeze
- name: Checkout flytesnacks
uses: actions/checkout@v3
diff --git a/examples/data_types_and_io/Dockerfile b/examples/data_types_and_io/Dockerfile
index db5734820..d93ea429b 100644
--- a/examples/data_types_and_io/Dockerfile
+++ b/examples/data_types_and_io/Dockerfile
@@ -1,3 +1,4 @@
+#syntax=docker/dockerfile:1.8
# ######################
# NOTE: For CI/CD only #
########################
@@ -17,8 +18,10 @@ ENV VENV /opt/venv
RUN python3 -m venv ${VENV}
ENV PATH="${VENV}/bin:$PATH"
-RUN pip install flytekit pandas
-RUN pip install torch --index-url https://download.pytorch.org/whl/cpu
+RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \
+ pip install flytekit pandas pyarrow
+RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \
+ pip install torch --index-url https://download.pytorch.org/whl/cpu
# Copy the actual code
COPY . /root
diff --git a/examples/data_types_and_io/requirements.in b/examples/data_types_and_io/requirements.in
index f33049738..79bd303e5 100644
--- a/examples/data_types_and_io/requirements.in
+++ b/examples/data_types_and_io/requirements.in
@@ -1,3 +1,4 @@
pandas
torch
tabulate
+pyarrow
diff --git a/examples/development_lifecycle/Dockerfile b/examples/development_lifecycle/Dockerfile
index 834f5dd29..a704a214d 100644
--- a/examples/development_lifecycle/Dockerfile
+++ b/examples/development_lifecycle/Dockerfile
@@ -1,3 +1,4 @@
+#syntax=docker/dockerfile:1.8
# ######################
# NOTE: For CI/CD only #
########################
@@ -21,7 +22,8 @@ ENV PATH="${VENV}/bin:$PATH"
# Install Python dependencies
COPY requirements.in /root
-RUN pip install -r /root/requirements.in
+RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \
+ pip install -r /root/requirements.in
# Copy the actual code
COPY . /root
diff --git a/examples/development_lifecycle/requirements.in b/examples/development_lifecycle/requirements.in
index 980271fac..8e50db9c6 100644
--- a/examples/development_lifecycle/requirements.in
+++ b/examples/development_lifecycle/requirements.in
@@ -3,3 +3,4 @@ flytekitplugins-deck-standard
plotly
scikit-learn
tabulate
+pyarrow
diff --git a/examples/duckdb_plugin/Dockerfile b/examples/duckdb_plugin/Dockerfile
index ef4f356dd..91901970a 100644
--- a/examples/duckdb_plugin/Dockerfile
+++ b/examples/duckdb_plugin/Dockerfile
@@ -1,3 +1,4 @@
+#syntax=docker/dockerfile:1.8
FROM python:3.8-buster
WORKDIR /root
@@ -25,7 +26,8 @@ ENV PATH="${VENV}/bin:$PATH"
# Install Python dependencies
COPY requirements.in /root/
-RUN pip install -r /root/requirements.in
+RUN --mount=type=cache,sharing=locked,mode=0777,target=/root/.cache/pip,id=pip \
+ pip install -r /root/requirements.in
# Copy the actual code
COPY . /root/
diff --git a/examples/duckdb_plugin/requirements.in b/examples/duckdb_plugin/requirements.in
index f4180eb1a..4f8699977 100644
--- a/examples/duckdb_plugin/requirements.in
+++ b/examples/duckdb_plugin/requirements.in
@@ -3,3 +3,4 @@ wheel
matplotlib
flytekitplugins-deck-standard
flytekitplugins-duckdb
+pyarrow
diff --git a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py
index e5e819e6d..ddc4409aa 100644
--- a/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py
+++ b/examples/kfpytorch_plugin/kfpytorch_plugin/pytorch_lightning_mnist_autoencoder.py
@@ -10,17 +10,10 @@
import os
import lightning as L
-from flytekit import ImageSpec, PodTemplate, Resources, task, workflow
+from flytekit import ImageSpec, Resources, task, workflow
from flytekit.extras.accelerators import T4
from flytekit.types.directory import FlyteDirectory
from flytekitplugins.kfpytorch.task import Elastic
-from kubernetes.client.models import (
- V1Container,
- V1EmptyDirVolumeSource,
- V1PodSpec,
- V1Volume,
- V1VolumeMount,
-)
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
@@ -69,19 +62,6 @@
# ```
# :::
-# %% [markdown]
-# We're also going to define a custom pod template that mounts a shared memory
-# volume to `/dev/shm`. This is necessary for distributed data parallel (DDP)
-# training so that state can be shared across workers.
-
-# %%
-container = V1Container(name=custom_image.name, volume_mounts=[V1VolumeMount(mount_path="/dev/shm", name="dshm")])
-volume = V1Volume(name="dshm", empty_dir=V1EmptyDirVolumeSource(medium="Memory"))
-custom_pod_template = PodTemplate(
- primary_container_name=custom_image.name,
- pod_spec=V1PodSpec(containers=[container], volumes=[volume]),
-)
-
# %% [markdown]
# ## Define a `LightningModule`
#
@@ -175,7 +155,6 @@ def train_dataloader(self):
),
accelerator=T4,
requests=Resources(mem="32Gi", cpu="48", gpu="8", ephemeral_storage="100Gi"),
- pod_template=custom_pod_template,
)
def train_model(dataloader_num_workers: int) -> FlyteDirectory:
"""Train an autoencoder model on the MNIST."""
diff --git a/examples/productionizing/productionizing/reference_task.py b/examples/productionizing/productionizing/reference_task.py
index 29c6efc46..a64d46468 100644
--- a/examples/productionizing/productionizing/reference_task.py
+++ b/examples/productionizing/productionizing/reference_task.py
@@ -1,6 +1,6 @@
-from typing import List, NamedTuple
+from typing import List
-from flytekit import StructuredDataset, reference_task, workflow
+from flytekit import reference_task, workflow
from flytekit.types.file import FlyteFile
# A `flytekit.reference_task` references the Flyte tasks that have already been defined, serialized, and registered.
@@ -26,22 +26,19 @@ def normalize_columns(
...
-outputs = NamedTuple("Outputs", results=StructuredDataset)
-
-
@reference_task(
project="flytesnacks",
domain="development",
- name="bigquery",
+ name="sql.bigquery.no_io",
version="{{ registration.version }}",
)
-def bigquery_task(version: int) -> outputs:
+def bigquery_task():
...
@workflow
def wf() -> FlyteFile:
- bigquery_task(version=1)
+ bigquery_task()
return normalize_columns(
csv_url="https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv",
column_names=["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"],
From 5289896956b20ecb1e58423a54d09d3090e0e21f Mon Sep 17 00:00:00 2001
From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com>
Date: Wed, 28 Aug 2024 16:09:43 -0400
Subject: [PATCH 04/35] Set python version to 3.9 in spark examples (#1725)
Signed-off-by: Eduardo Apolinario
Co-authored-by: Eduardo Apolinario
---
examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py | 2 +-
examples/k8s_spark_plugin/k8s_spark_plugin/pyspark_pi.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py b/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
index f672ebc6e..4d009a834 100644
--- a/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
+++ b/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
@@ -19,7 +19,7 @@
# %% [markdown]
# Create an `ImageSpec` to automate the retrieval of a prebuilt Spark image.
# %%
-custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark"])
+custom_image = ImageSpec(python_version="3.9", registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark"])
# %% [markdown]
# :::{important}
diff --git a/examples/k8s_spark_plugin/k8s_spark_plugin/pyspark_pi.py b/examples/k8s_spark_plugin/k8s_spark_plugin/pyspark_pi.py
index d79b1705e..25332adad 100644
--- a/examples/k8s_spark_plugin/k8s_spark_plugin/pyspark_pi.py
+++ b/examples/k8s_spark_plugin/k8s_spark_plugin/pyspark_pi.py
@@ -16,7 +16,7 @@
# %% [markdown]
# Create an `ImageSpec` to automate the retrieval of a prebuilt Spark image.
# %%
-custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark"])
+custom_image = ImageSpec(python_version="3.9", registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark"])
# %% [markdown]
From 89bf7bc7788802097904c5f9ffb75ba70ef980a6 Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Wed, 28 Aug 2024 22:30:16 -0700
Subject: [PATCH 05/35] Add an example for echo task (#1724)
Signed-off-by: Kevin Su
Signed-off-by: Kevin Su
Co-authored-by: Nikki Everett
---
.../advanced_composition/conditional.py | 32 +++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/examples/advanced_composition/advanced_composition/conditional.py b/examples/advanced_composition/advanced_composition/conditional.py
index 9c6cf5d38..7c314162b 100644
--- a/examples/advanced_composition/advanced_composition/conditional.py
+++ b/examples/advanced_composition/advanced_composition/conditional.py
@@ -1,6 +1,7 @@
import random
from flytekit import conditional, task, workflow
+from flytekit.core.task import Echo
# Simple branch
@@ -177,6 +178,37 @@ def consume_task_output(radius: float, seed: int = 5) -> float:
)
+# Running a noop task in a conditional
+#
+# In some cases, you may want to skip the execution of a conditional workflow
+# if a certain condition is not met.
+# You can achieve this by using the `echo` task, which simply returns the input value.
+
+# :::{note}
+# To enable the echo plugin in the backend, add the plugin to Flyte's configuration file.
+# ```yaml
+# task-plugins:
+# enabled-plugins:
+# - echo
+# ```
+# :::
+
+
+echo = Echo(name="echo", inputs={"radius": float})
+
+
+@workflow
+def noop_in_conditional(radius: float, seed: int = 5) -> float:
+ is_heads = coin_toss(seed=seed)
+ return (
+ conditional("noop_in_conditional")
+ .if_(is_heads.is_true())
+ .then(calculate_circle_circumference(radius=radius))
+ .else_()
+ .then(echo(radius=radius))
+ )
+
+
# Run the workflow locally
if __name__ == "__main__":
default_seed_output = consume_task_output(radius=0.4)
From 8613c49cf9664eef38352d0daaa70c9dec8e4c05 Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Wed, 4 Sep 2024 18:24:52 -0700
Subject: [PATCH 06/35] fix(Dockerfile): unpin flytekit version (#1727)
---
examples/advanced_composition/Dockerfile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/advanced_composition/Dockerfile b/examples/advanced_composition/Dockerfile
index e14e58771..0e2cd1bd9 100644
--- a/examples/advanced_composition/Dockerfile
+++ b/examples/advanced_composition/Dockerfile
@@ -17,7 +17,7 @@ ENV VENV /opt/venv
RUN python3 -m venv ${VENV}
ENV PATH="${VENV}/bin:$PATH"
-RUN pip install flytekit==1.12.0
+RUN pip install flytekit
# Copy the actual code
COPY . /root
From beab2f98bee8d5ebfa15aaac0fb44274e529efb5 Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Sun, 8 Sep 2024 23:56:09 -0700
Subject: [PATCH 07/35] fix dataframe_passing.py (#1728)
Signed-off-by: Kevin Su
---
.../k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py b/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
index 4d009a834..b4d2c7536 100644
--- a/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
+++ b/examples/k8s_spark_plugin/k8s_spark_plugin/dataframe_passing.py
@@ -19,7 +19,9 @@
# %% [markdown]
# Create an `ImageSpec` to automate the retrieval of a prebuilt Spark image.
# %%
-custom_image = ImageSpec(python_version="3.9", registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark"])
+custom_image = ImageSpec(
+ python_version="3.9", registry="ghcr.io/flyteorg", packages=["flytekitplugins-spark", "pyarrow"]
+)
# %% [markdown]
# :::{important}
From 0c308d19333d495986229b25169b5e28dadfecc6 Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Mon, 9 Sep 2024 01:45:22 -0700
Subject: [PATCH 08/35] fix task_cache.py (#1729)
---
.../development_lifecycle/development_lifecycle/task_cache.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/development_lifecycle/development_lifecycle/task_cache.py b/examples/development_lifecycle/development_lifecycle/task_cache.py
index 712e06db3..48f3d6cf7 100644
--- a/examples/development_lifecycle/development_lifecycle/task_cache.py
+++ b/examples/development_lifecycle/development_lifecycle/task_cache.py
@@ -11,7 +11,7 @@
image_spec = ImageSpec(
registry="ghcr.io/flyteorg",
- packages=["pandas"],
+ packages=["pandas", "pyarrow"],
)
From 63e580cf02f9a9439fadd813b11d670d1942470c Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Tue, 10 Sep 2024 00:46:59 -0700
Subject: [PATCH 09/35] Fix pandera example (#1731)
Fix pandera exampel
Signed-off-by: Kevin Su
---
examples/pandera_plugin/pandera_plugin/basic_schema_example.py | 2 +-
.../pandera_plugin/validating_and_testing_ml_pipelines.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py
index 8c8d36b2a..adc164f4d 100644
--- a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py
+++ b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py
@@ -15,7 +15,7 @@
from flytekit import ImageSpec, task, workflow
from pandera.typing import DataFrame, Series
-custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"])
+custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"])
# %% [markdown]
# ## A Simple Data Processing Pipeline
diff --git a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py
index 85fdef648..2cb649312 100644
--- a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py
+++ b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py
@@ -52,7 +52,7 @@
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
-custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"])
+custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"])
# %% [markdown]
# We also need to import the `pandera` flytekit plugin to enable dataframe runtime type-checking:
From 1b4d46d04f3d11b8d70e78a421719abcd4887756 Mon Sep 17 00:00:00 2001
From: Kevin Su
Date: Fri, 13 Sep 2024 18:15:20 -0700
Subject: [PATCH 10/35] unpin whylogs (#1732)
Signed-off-by: Kevin Su
---
.github/workflows/checks.yml | 6 +++---
.github/workflows/serialize_example.yml | 4 ++--
examples/whylogs_plugin/requirements.in | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 9c7b9fb10..ead9fa466 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -136,7 +136,7 @@ jobs:
--force
tar -xvf flyte-package.tgz
- name: Upload artifacts
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: snacks-examples-${{ matrix.example }}
path: examples/${{ matrix.example }}/**/*.pb
@@ -155,7 +155,7 @@ jobs:
run: |
mkdir download-artifact
- name: Download artifacts
- uses: actions/download-artifact@v2
+ uses: actions/download-artifact@v4
with:
path: ./download-artifact/
- name: setup sandbox
@@ -216,7 +216,7 @@ jobs:
run: |
mkdir download-artifact
- name: Download artifacts
- uses: actions/download-artifact@v2
+ uses: actions/download-artifact@v4
with:
path: ./download-artifact/
- name: Package Examples
diff --git a/.github/workflows/serialize_example.yml b/.github/workflows/serialize_example.yml
index 4d1f38c78..8b5442532 100644
--- a/.github/workflows/serialize_example.yml
+++ b/.github/workflows/serialize_example.yml
@@ -38,7 +38,7 @@ jobs:
./scripts/serialize-example.sh ${{ matrix.directory }} ${{ github.sha }}
tar -xvf ${{ matrix.directory }}/flyte-package.tgz -C ${{ matrix.directory }}
- name: Upload artifacts
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v4
with:
name: snacks-${{ steps.example_id.outputs.EXAMPLE_ID }}
path: ${{ matrix.directory }}/**/*.pb
@@ -57,7 +57,7 @@ jobs:
run: |
mkdir download-artifact
- name: Download artifacts
- uses: actions/download-artifact@v2
+ uses: actions/download-artifact@v4
with:
path: ./download-artifact/
- name: setup sandbox
diff --git a/examples/whylogs_plugin/requirements.in b/examples/whylogs_plugin/requirements.in
index 0091e81e3..3f8bc07aa 100644
--- a/examples/whylogs_plugin/requirements.in
+++ b/examples/whylogs_plugin/requirements.in
@@ -4,6 +4,6 @@ matplotlib
flytekitplugins-deck-standard
flytekitplugins-whylogs>=1.1.1b0
scikit-learn
-whylogs[s3]==1.3.30
+whylogs[s3]
whylogs[mlflow]
whylogs[whylabs]
From a94aa970b1e74f44acc498dff777410355867cfe Mon Sep 17 00:00:00 2001
From: Samhita Alla
Date: Thu, 19 Sep 2024 19:39:44 +0530
Subject: [PATCH 11/35] add ollama docs (#1722)
* add ollama docs
Signed-off-by: Samhita Alla
* update doc
Signed-off-by: Samhita Alla
* update index
Signed-off-by: Samhita Alla
* add system prompt as task input
Signed-off-by: Samhita Alla
* add user prompt
Signed-off-by: Samhita Alla
* set gpu to 1 in ollama initialization
Signed-off-by: Samhita Alla
* remove modelfile
Signed-off-by: Samhita Alla
* bump version
Signed-off-by: Samhita Alla
* fix code
Signed-off-by: Samhita Alla
---------
Signed-off-by: Samhita Alla
---
docs/index.md | 1 +
docs/integrations.md | 2 +
examples/nim_plugin/README.md | 2 +-
examples/ollama_plugin/Dockerfile | 23 +++++
examples/ollama_plugin/README.md | 36 +++++++
.../ollama_plugin/ollama_plugin/__init__.py | 0
.../ollama_plugin/ollama_plugin/serve_llm.py | 99 +++++++++++++++++++
examples/ollama_plugin/requirements.in | 1 +
8 files changed, 163 insertions(+), 1 deletion(-)
create mode 100644 examples/ollama_plugin/Dockerfile
create mode 100644 examples/ollama_plugin/README.md
create mode 100644 examples/ollama_plugin/ollama_plugin/__init__.py
create mode 100644 examples/ollama_plugin/ollama_plugin/serve_llm.py
create mode 100644 examples/ollama_plugin/requirements.in
diff --git a/docs/index.md b/docs/index.md
index a67bfb480..027058e5c 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -120,6 +120,7 @@ auto_examples/mmcloud_agent/index
auto_examples/modin_plugin/index
auto_examples/kfmpi_plugin/index
auto_examples/nim_plugin/index
+auto_examples/ollama_plugin/index
auto_examples/onnx_plugin/index
auto_examples/openai_batch_agent/index
auto_examples/papermill_plugin/index
diff --git a/docs/integrations.md b/docs/integrations.md
index f4ff3e08f..b763017ad 100644
--- a/docs/integrations.md
+++ b/docs/integrations.md
@@ -44,6 +44,8 @@ Flytekit functionality. These plugins can be anything and for comparison can be
- `wandb`: Machine learning platform to build better models faster.
* - {doc}`NIM `
- Serve optimized model containers with NIM.
+* - {doc}`Ollama `
+ - Serve fine-tuned LLMs with Ollama in a Flyte workflow.
```
:::{dropdown} {fa}`info-circle` Using flytekit plugins
diff --git a/examples/nim_plugin/README.md b/examples/nim_plugin/README.md
index 506c9eab9..36011695b 100644
--- a/examples/nim_plugin/README.md
+++ b/examples/nim_plugin/README.md
@@ -29,7 +29,7 @@ pip install flytekitplugins-inference
For a usage example, see {doc}`NIM example usage `.
```{note}
-NIM can only be run in a Flyte cluster, not locally, as it must be deployed as a sidecar service in a Kubernetes pod.
+NIM can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod.
```
```{toctree}
diff --git a/examples/ollama_plugin/Dockerfile b/examples/ollama_plugin/Dockerfile
new file mode 100644
index 000000000..0c46be23a
--- /dev/null
+++ b/examples/ollama_plugin/Dockerfile
@@ -0,0 +1,23 @@
+# ######################
+# NOTE: For CI/CD only #
+########################
+FROM python:3.11-slim-buster
+LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks
+
+WORKDIR /root
+ENV VENV /opt/venv
+ENV LANG C.UTF-8
+ENV LC_ALL C.UTF-8
+ENV PYTHONPATH /root
+
+# Install Python dependencies
+COPY requirements.in /root
+RUN pip install -r /root/requirements.in
+
+# Copy the actual code
+COPY . /root/
+
+# This tag is supplied by the build script and will be used to determine the version
+# when registering tasks, workflows, and launch plans
+ARG tag
+ENV FLYTE_INTERNAL_IMAGE $tag
diff --git a/examples/ollama_plugin/README.md b/examples/ollama_plugin/README.md
new file mode 100644
index 000000000..75b97f0fb
--- /dev/null
+++ b/examples/ollama_plugin/README.md
@@ -0,0 +1,36 @@
+(ollama_plugin)=
+
+# Ollama
+
+```{eval-rst}
+.. tags:: Inference, LLM
+```
+
+Serve large language models (LLMs) in a Flyte task.
+
+[Ollama](https://ollama.com/) simplifies the process of serving fine-tuned LLMs.
+Whether you're generating predictions from a customized model or deploying it across different hardware setups,
+Ollama enables you to encapsulate the entire workflow in a single pipeline.
+
+## Installation
+
+To use the Ollama plugin, run the following command:
+
+```
+pip install flytekitplugins-inference
+```
+
+## Example usage
+
+For a usage example, see {doc}`Ollama example usage `.
+
+```{note}
+Ollama can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod.
+```
+
+```{toctree}
+:maxdepth: -1
+:hidden:
+
+serve_llm
+```
diff --git a/examples/ollama_plugin/ollama_plugin/__init__.py b/examples/ollama_plugin/ollama_plugin/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/examples/ollama_plugin/ollama_plugin/serve_llm.py b/examples/ollama_plugin/ollama_plugin/serve_llm.py
new file mode 100644
index 000000000..f96ef7252
--- /dev/null
+++ b/examples/ollama_plugin/ollama_plugin/serve_llm.py
@@ -0,0 +1,99 @@
+# %% [markdown]
+# (serve_llm)=
+#
+# # Serve LLMs with Ollama
+#
+# In this guide, you'll learn how to locally serve Gemma2 and fine-tuned Llama3 models using Ollama within a Flyte task.
+#
+# Start by importing Ollama from the `flytekitplugins.inference` package and specifying the desired model name.
+#
+# Below is a straightforward example of serving a Gemma2 model:
+# %%
+from flytekit import ImageSpec, Resources, task
+from flytekit.extras.accelerators import A10G
+from flytekitplugins.inference import Model, Ollama
+from openai import OpenAI
+
+image = ImageSpec(
+ name="ollama_serve",
+ registry="ghcr.io/flyteorg",
+ packages=["flytekitplugins-inference"],
+ builder="default",
+)
+
+ollama_instance = Ollama(model=Model(name="gemma2"), gpu="1")
+
+
+@task(
+ container_image=image,
+ pod_template=ollama_instance.pod_template,
+ accelerator=A10G,
+ requests=Resources(gpu="0"),
+)
+def model_serving(user_prompt: str) -> str:
+ client = OpenAI(base_url=f"{ollama_instance.base_url}/v1", api_key="ollama") # api key required but ignored
+
+ completion = client.chat.completions.create(
+ model="gemma2",
+ messages=[
+ {
+ "role": "user",
+ "content": user_prompt,
+ }
+ ],
+ temperature=0.5,
+ top_p=1,
+ max_tokens=1024,
+ )
+
+ return completion.choices[0].message.content
+
+
+# %% [markdown]
+# :::{important}
+# Replace `ghcr.io/flyteorg` with a container registry to which you can publish.
+# To upload the image to the local registry in the demo cluster, indicate the registry as `localhost:30000`.
+# :::
+#
+# The `model_serving` task initiates a sidecar service to serve the model, making it accessible on localhost via the `base_url` property.
+# You can use either the chat or chat completion endpoints.
+#
+# By default, Ollama initializes the server with `cpu`, `gpu`, and `mem` set to `1`, `1`, and `15Gi`, respectively.
+# You can adjust these settings to meet your requirements.
+#
+# To serve a fine-tuned model, provide the model configuration as `modelfile` within the `Model` dataclass.
+#
+# Below is an example of specifying a fine-tuned LoRA adapter for a Llama3 Mario model:
+# %%
+from flytekit.types.file import FlyteFile
+
+finetuned_ollama_instance = Ollama(
+ model=Model(
+ name="llama3-mario",
+ modelfile="FROM llama3\nADAPTER {inputs.ggml}\nPARAMETER temperature 1\nPARAMETER num_ctx 4096\nSYSTEM {inputs.system_prompt}",
+ ),
+ gpu="1",
+)
+
+
+@task(
+ container_image=image,
+ pod_template=finetuned_ollama_instance.pod_template,
+ accelerator=A10G,
+ requests=Resources(gpu="0"),
+)
+def finetuned_model_serving(ggml: FlyteFile, system_prompt: str):
+ ...
+
+
+# %% [markdown]
+# `{inputs.ggml}` and `{inputs.system_prompt}` are materialized at run time, with `ggml` and `system_prompt` available as inputs to the task.
+#
+# Ollama models can be integrated into different stages of your AI workflow, including data pre-processing,
+# model inference, and post-processing. Flyte also allows serving multiple Ollama models simultaneously
+# on various instances.
+#
+# This integration enables you to self-host and serve AI models on your own infrastructure,
+# ensuring full control over costs and data security.
+#
+# For more detailed information on the models natively supported by Ollama, visit the [Ollama models library](https://ollama.com/library).
diff --git a/examples/ollama_plugin/requirements.in b/examples/ollama_plugin/requirements.in
new file mode 100644
index 000000000..a4a684ce6
--- /dev/null
+++ b/examples/ollama_plugin/requirements.in
@@ -0,0 +1 @@
+flytekitplugins-inference>=1.13.6b1
From b7b0f7a4248e175993541b9eba1d0720be5cebf8 Mon Sep 17 00:00:00 2001
From: Future-Outlier
Date: Tue, 24 Sep 2024 15:40:57 +0800
Subject: [PATCH 12/35] Update Databricks agent example to V2 (#1733)
* use V2 instead
Signed-off-by: Future-Outlier
* Update examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py
Co-authored-by: Kevin Su
Signed-off-by: Future-Outlier
* Update examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py
Co-authored-by: Kevin Su
Signed-off-by: Future-Outlier
---------
Signed-off-by: Future-Outlier
Co-authored-by: Kevin Su
---
.../databricks_agent/databricks_agent_example_usage.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py
index 9de652bc3..1fe9d3ff9 100644
--- a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py
+++ b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py
@@ -11,7 +11,7 @@
import flytekit
from flytekit import Resources, task, workflow
-from flytekitplugins.spark import Databricks
+from flytekitplugins.spark import DatabricksV2 as Databricks
# %% [markdown]
From a1dde1984ecd15d18d30e4db3afa62579a580978 Mon Sep 17 00:00:00 2001
From: Future-Outlier
Date: Fri, 27 Sep 2024 02:18:45 +0800
Subject: [PATCH 13/35] Update FlyteDeck Example (#1734)
* use V2 instead
Signed-off-by: Future-Outlier
* update flyte deck example
Signed-off-by: Future-Outlier
* nit
Signed-off-by: Future-Outlier
---------
Signed-off-by: Future-Outlier
---
.../development_lifecycle/decks.py | 24 +++++++++++++------
1 file changed, 17 insertions(+), 7 deletions(-)
diff --git a/examples/development_lifecycle/development_lifecycle/decks.py b/examples/development_lifecycle/development_lifecycle/decks.py
index d849bd339..50cf4daf2 100644
--- a/examples/development_lifecycle/development_lifecycle/decks.py
+++ b/examples/development_lifecycle/development_lifecycle/decks.py
@@ -13,7 +13,17 @@
# https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html#image-spec-example
custom_image = ImageSpec(
- packages=["plotly", "scikit-learn", "flytekitplugins-deck-standard"], registry="ghcr.io/flyteorg"
+ packages=[
+ "flytekitplugins-deck-standard",
+ "markdown",
+ "pandas",
+ "pillow",
+ "plotly",
+ "pyarrow",
+ "scikit-learn",
+ "ydata_profiling",
+ ],
+ registry="ghcr.io/flyteorg",
)
if custom_image.is_container():
@@ -47,7 +57,7 @@ def pca_plot():
from flytekitplugins.deck.renderer import FrameProfilingRenderer
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def frame_renderer() -> None:
df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
flytekit.Deck("Frame Renderer", FrameProfilingRenderer().to_html(df=df))
@@ -61,7 +71,7 @@ def frame_renderer() -> None:
from flytekit.deck import TopFrameRenderer
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]:
return pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
@@ -70,7 +80,7 @@ def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]:
# producing HTML as a Unicode string.
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def markdown_renderer() -> None:
flytekit.current_context().default_deck.append(
MarkdownRenderer().to_html("You can install flytekit using this command: ```import flytekit```")
@@ -87,7 +97,7 @@ def markdown_renderer() -> None:
from flytekitplugins.deck.renderer import BoxRenderer
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def box_renderer() -> None:
iris_df = px.data.iris()
flytekit.Deck("Box Plot", BoxRenderer("sepal_length").to_html(iris_df))
@@ -101,7 +111,7 @@ def box_renderer() -> None:
from flytekitplugins.deck.renderer import ImageRenderer
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def image_renderer(image: FlyteFile) -> None:
flytekit.Deck("Image Renderer", ImageRenderer().to_html(image_src=image))
@@ -117,7 +127,7 @@ def image_renderer_wf(
from flytekitplugins.deck.renderer import TableRenderer
-@task(enable_deck=True)
+@task(enable_deck=True, container_image=custom_image)
def table_renderer() -> None:
flytekit.Deck(
"Table Renderer",
From caadd269af894e50b88c911bbda38ffeaf0a1bb5 Mon Sep 17 00:00:00 2001
From: Nikki Everett
Date: Fri, 4 Oct 2024 13:23:34 -0500
Subject: [PATCH 14/35] Updates for Flyte docs overhaul (#1726)
* moved to flyte repo
Signed-off-by: nikki everett
* update refs
Signed-off-by: nikki everett
* update refs in examples
Signed-off-by: nikki everett
* configure pydata theme
Signed-off-by: nikki everett
* restructure integrations docs
Signed-off-by: nikki everett
* restructure tutorials
Signed-off-by: nikki everett
* fix contributing guide link
Signed-off-by: nikki everett
* remove double h1
Signed-off-by: nikki everett
* remove unneceesary captions
Signed-off-by: nikki everett
* move deprecated integrations back to flytesnacks and remove unnecessary captions from tutorials and integrations toctrees
Signed-off-by: nikki everett
* fix regression
Signed-off-by: nikki everett
* linting
Signed-off-by: nikki everett
* update docs-requirements.txt from new docs-requirements.in
Signed-off-by: nikki everett
* update README
Signed-off-by: nikki everett
* Regenerate all requirements files using python 3.9
Signed-off-by: Eduardo Apolinario
* Regenerate readthedocs requirements files using python 3.12
Signed-off-by: Eduardo Apolinario
* fix formatting for deprecated plugins
Signed-off-by: nikki everett
* Add pandas to requirements.in in a bunch of places
Signed-off-by: Eduardo Apolinario
* Install pandas in e2e-tests
Signed-off-by: Eduardo Apolinario
* use ref for docs contributing guide
Signed-off-by: nikki everett
---------
Signed-off-by: nikki everett
Signed-off-by: Eduardo Apolinario
Co-authored-by: Eduardo Apolinario
---
.github/workflows/checks.yml | 2 +-
.readthedocs.yml | 2 +-
README.md | 14 +-
dev-requirements.txt | 307 +++++-----
docs-requirements.in | 7 +-
docs-requirements.txt | 531 ++++++------------
docs/conf.py | 12 +-
docs/contribute.md | 359 ------------
docs/index.md | 139 +----
.../deprecated_integrations/index.md | 10 +
.../index.md} | 114 ++--
.../bioinformatics/index.md} | 4 +-
.../feature_engineering/index.md} | 10 +-
.../flytelab/index.md} | 0
.../flytelab}/weather_forecasting.md | 0
docs/{tutorials.md => tutorials/index.md} | 35 +-
.../model_training/index.md} | 22 +-
.../airflow_plugin/airflow_plugin/airflow.py | 2 +-
examples/bigquery_agent/requirements.in | 1 +
examples/bigquery_plugin/README.md | 13 +-
.../bigquery_plugin_example.py | 6 +
examples/bigquery_plugin/requirements.in | 1 +
examples/blast/requirements.in | 1 +
examples/databricks_plugin/README.md | 13 +-
.../databricks_plugin_example.py | 6 +
.../development_lifecycle/requirements.in | 1 +
examples/house_price_prediction/README.md | 2 +-
.../house_price_prediction/requirements.in | 1 +
examples/k8s_dask_plugin/README.md | 2 +-
examples/k8s_pod_plugin/README.md | 2 +-
examples/k8s_spark_plugin/README.md | 4 +-
examples/kfpytorch_plugin/README.md | 2 +-
examples/kftensorflow_plugin/README.md | 2 +-
.../mmcloud_agent_example_usage.py | 2 +-
examples/pima_diabetes/requirements.in | 1 +
examples/ray_plugin/README.md | 2 +-
examples/snowflake_plugin/README.md | 13 +-
examples/snowflake_plugin/snowflake_plugin.md | 2 +-
.../snowflake_plugin_example.py | 5 +
examples/whylogs_plugin/requirements.in | 1 +
40 files changed, 550 insertions(+), 1103 deletions(-)
delete mode 100644 docs/contribute.md
create mode 100644 docs/integrations/deprecated_integrations/index.md
rename docs/{integrations.md => integrations/index.md} (63%)
rename docs/{bioinformatics_examples.md => tutorials/bioinformatics/index.md} (81%)
rename docs/{feature_engineering.md => tutorials/feature_engineering/index.md} (62%)
rename docs/{flyte_lab.md => tutorials/flytelab/index.md} (100%)
rename docs/{ => tutorials/flytelab}/weather_forecasting.md (100%)
rename docs/{tutorials.md => tutorials/index.md} (62%)
rename docs/{ml_training.md => tutorials/model_training/index.md} (50%)
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index ead9fa466..f88c21953 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -293,7 +293,7 @@ jobs:
pip install uv
uv venv
source .venv/bin/activate
- uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow
+ uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow pandas
pip freeze
- name: Checkout flytesnacks
uses: actions/checkout@v3
diff --git a/.readthedocs.yml b/.readthedocs.yml
index b1825c215..796b75d7d 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -7,7 +7,7 @@ version: 2
build:
os: "ubuntu-22.04"
tools:
- python: "3.11"
+ python: "3.12"
apt_packages:
- pandoc
diff --git a/README.md b/README.md
index bd2e73154..f6ad212d4 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,10 @@
- Flyte User Guide & Tutorials
+ Flyte tutorials and integrations examples
- Flytesnacks encompasses code examples showcasing Flytekit Python
+ Flytesnacks encompasses tutorials and integrations examples showcasing Flytekit Python
@@ -30,11 +30,9 @@
> To get the hang of Python SDK, refer to the [Getting Started](https://docs.flyte.org/en/latest/getting_started.html) tutorial before exploring the examples.
-The [User Guide](https://docs.flyte.org/projects/cookbook/en/latest/index.html) section has code examples, tips, and tricks that showcase the usage of Flyte features and integrations.
+The [Tutorials](https://docs.flyte.org/en/latest/flytesnacks/tutorials/index.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering.
-The [Tutorials](https://docs.flyte.org/projects/cookbook/en/latest/tutorials.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering.
-
-the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integrations.html) section demonstrates how to use Flyte with other tools and frameworks.
+the [Integrations](https://docs.flyte.org/en/latest/flytesnacks/integrations/index.html) section demonstrates how to use Flyte with other tools and frameworks.
> Flytesnacks currently has all examples in Python (Flytekit Python SDK).
@@ -44,7 +42,7 @@ the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integratio