diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 2280b987c..f88c21953 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -136,7 +136,7 @@ jobs: --force tar -xvf flyte-package.tgz - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: snacks-examples-${{ matrix.example }} path: examples/${{ matrix.example }}/**/*.pb @@ -155,7 +155,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: setup sandbox @@ -216,7 +216,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: Package Examples @@ -293,7 +293,7 @@ jobs: pip install uv uv venv source .venv/bin/activate - uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow + uv pip install "flytekit>=1.12.2" flytekitplugins-deck-standard torch tabulate pyarrow pandas pip freeze - name: Checkout flytesnacks uses: actions/checkout@v3 diff --git a/.github/workflows/monodocs_build.yml b/.github/workflows/monodocs_build.yml index 7460e7bcf..5c4907e22 100644 --- a/.github/workflows/monodocs_build.yml +++ b/.github/workflows/monodocs_build.yml @@ -65,6 +65,7 @@ jobs: shell: bash -el {0} env: FLYTESNACKS_LOCAL_PATH: ${{ github.workspace }}/flytesnacks + DOCSEARCH_API_KEY: fake_docsearch_api_key # must be set to get doc build to succeed run: | conda activate monodocs-env make -C docs html SPHINXOPTS="-W" diff --git a/.github/workflows/serialize_example.yml b/.github/workflows/serialize_example.yml index d96f4e927..8b5442532 100644 --- a/.github/workflows/serialize_example.yml +++ b/.github/workflows/serialize_example.yml @@ -38,7 +38,7 @@ jobs: ./scripts/serialize-example.sh ${{ matrix.directory }} ${{ github.sha }} tar -xvf ${{ matrix.directory }}/flyte-package.tgz -C ${{ matrix.directory }} - name: Upload artifacts - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: snacks-${{ steps.example_id.outputs.EXAMPLE_ID }} path: ${{ matrix.directory }}/**/*.pb @@ -57,7 +57,7 @@ jobs: run: | mkdir download-artifact - name: Download artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v4 with: path: ./download-artifact/ - name: setup sandbox diff --git a/.gitignore b/.gitignore index dcb73da03..cd4020676 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ __pycache__/ .idea .jpg .ipynb_checkpoints/ -*.csv *.dat .DS_Store gen_modules diff --git a/.readthedocs.yml b/.readthedocs.yml index b1825c215..796b75d7d 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -7,7 +7,7 @@ version: 2 build: os: "ubuntu-22.04" tools: - python: "3.11" + python: "3.12" apt_packages: - pandoc diff --git a/README.md b/README.md index bd2e73154..5ccbe2ed7 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,10 @@ Flyte Logo

- Flyte User Guide & Tutorials + Flyte tutorials and integrations examples

- Flytesnacks encompasses code examples showcasing Flytekit Python + Flytesnacks encompasses tutorials and integrations examples showcasing Flytekit Python

@@ -30,11 +30,10 @@ > To get the hang of Python SDK, refer to the [Getting Started](https://docs.flyte.org/en/latest/getting_started.html) tutorial before exploring the examples. -The [User Guide](https://docs.flyte.org/projects/cookbook/en/latest/index.html) section has code examples, tips, and tricks that showcase the usage of Flyte features and integrations. +The [Tutorials](https://docs.flyte.org/en/latest/flytesnacks/tutorials/index.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering. -The [Tutorials](https://docs.flyte.org/projects/cookbook/en/latest/tutorials.html) section has real-world examples, ranging from machine learning training, data processing to feature engineering. +the [Integrations](https://docs.flyte.org/en/latest/flytesnacks/integrations/index.html) section demonstrates how to use Flyte with other tools and frameworks. -the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integrations.html) section demonstrates how to use Flyte with other tools and frameworks. > Flytesnacks currently has all examples in Python (Flytekit Python SDK). @@ -44,7 +43,7 @@ the [Integrations](https://docs.flyte.org/projects/cookbook/en/latest/integratio -You can find the detailed contribution guide [here](docs/contribute.md). +You can find the detailed contribution guide [here](https://docs.flyte.org/en/latest/community/contribute_docs.html#contributing-tutorials-and-integrations-examples).

@@ -52,4 +51,4 @@ You can find the detailed contribution guide [here](docs/contribute.md).

-Refer to the [issues](https://docs.flyte.org/en/latest/community/contribute.html##file-an-issue) section in the contribution guide if you'd like to file an issue. +Refer to the [issues](https://docs.flyte.org/en/latest/community/contribute.html#file-an-issue) section in the contribution guide if you'd like to file an issue. diff --git a/_example_template/Dockerfile b/_example_template/Dockerfile index 65449f624..d56c81ef4 100644 --- a/_example_template/Dockerfile +++ b/_example_template/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.8-slim-buster -LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks WORKDIR /root ENV VENV /opt/venv diff --git a/dev-requirements.txt b/dev-requirements.txt index 39a1cdb2d..6716a9987 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,188 +1,185 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile ./dev-requirements.in +# pip-compile dev-requirements.in # -adlfs==2023.10.0 +adlfs==2024.7.0 # via flytekit -aiobotocore==2.5.4 +aiobotocore==2.15.1 # via s3fs -aiohttp==3.9.1 +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.10.8 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp -arrow==1.3.0 - # via cookiecutter -async-timeout==4.0.3 - # via aiohttp -attrs==23.1.0 - # via aiohttp -autoflake==2.2.1 +attrs==24.2.0 + # via + # aiohttp + # jsonlines +autoflake==2.3.1 # via -r dev-requirements.in -azure-core==1.29.5 +azure-core==1.31.0 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.53 # via adlfs -azure-identity==1.15.0 +azure-identity==1.18.0 # via adlfs -azure-storage-blob==12.19.0 +azure-storage-blob==12.23.1 # via adlfs -binaryornot==0.4.4 - # via cookiecutter -botocore==1.31.17 +botocore==1.35.23 # via aiobotocore -cachetools==5.3.2 +cachetools==5.5.0 # via google-auth -certifi==2023.11.17 - # via - # kubernetes - # requests -cffi==1.16.0 +certifi==2024.8.30 + # via requests +cffi==1.17.1 # via # azure-datalake-store # cryptography cfgv==3.4.0 # via pre-commit -chardet==5.2.0 - # via binaryornot charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # cookiecutter + # dask # flytekit # rich-click cloudpickle==3.0.0 - # via flytekit -codespell==2.2.6 + # via + # dask + # flytekit +codespell==2.3.0 # via -r dev-requirements.in -cookiecutter==2.5.0 - # via flytekit -coverage==7.3.2 +coverage==7.6.1 # via -r dev-requirements.in -croniter==2.0.1 +croniter==3.0.3 # via flytekit -cryptography==41.0.7 +cryptography==43.0.1 # via # azure-identity # azure-storage-blob # msal # pyjwt - # secretstorage +dask[array,dataframe]==2024.9.1 + # via + # -r dev-requirements.in + # dask-expr +dask-expr==1.1.15 + # via dask dataclasses-json==0.5.9 # via flytekit decorator==5.1.1 # via gcsfs diskcache==5.6.3 # via flytekit -distlib==0.3.7 +distlib==0.3.8 # via virtualenv -docker==6.1.3 +docker==7.1.0 # via flytekit -docstring-parser==0.15 +docstring-parser==0.16 # via flytekit -exceptiongroup==1.2.0 - # via pytest -filelock==3.13.1 +filelock==3.16.1 # via virtualenv -flyteidl==1.10.6 +flyteidl==1.13.4 # via flytekit -flytekit==1.10.2 +flytekit==1.13.7 # via -r dev-requirements.in -frozenlist==1.4.0 +frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.2 +fsspec==2024.9.0 + # via # adlfs + # dask # flytekit # gcsfs # s3fs -gcsfs==2023.9.2 +gcsfs==2024.9.0.post1 # via flytekit -google-api-core==2.15.0 +google-api-core==2.20.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.25.1 +google-auth==2.35.0 # via # gcsfs # google-api-core # google-auth-oauthlib # google-cloud-core # google-cloud-storage - # kubernetes -google-auth-oauthlib==1.1.0 +google-auth-oauthlib==1.2.1 # via gcsfs google-cloud-core==2.4.1 # via google-cloud-storage -google-cloud-storage==2.13.0 +google-cloud-storage==2.18.2 # via gcsfs -google-crc32c==1.5.0 +google-crc32c==1.6.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.6.0 +google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos==1.62.0 +googleapis-common-protos==1.65.0 # via # flyteidl # flytekit # google-api-core # grpcio-status -grpcio==1.60.0 + # protoc-gen-openapiv2 +grpcio==1.66.2 # via # flytekit # grpcio-status -grpcio-status==1.60.0 +grpcio-status==1.66.2 # via flytekit -identify==2.5.33 +identify==2.6.1 # via pre-commit -idna==3.6 +idna==3.10 # via # requests # yarl -importlib-metadata==7.0.0 - # via - # flytekit - # keyring +importlib-metadata==8.5.0 + # via flytekit iniconfig==2.0.0 # via pytest isodate==0.6.1 # via azure-storage-blob -jaraco-classes==3.3.0 +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 # via keyring -jeepney==0.8.0 - # via - # keyring - # secretstorage -jinja2==3.1.2 - # via cookiecutter jmespath==1.0.1 # via botocore -joblib==1.3.2 +joblib==1.4.2 # via flytekit -jsonpickle==3.0.2 +jsonlines==4.0.0 # via flytekit -keyring==24.3.0 +jsonpickle==3.3.0 # via flytekit -kubernetes==28.1.0 +keyring==25.4.1 # via flytekit +locket==1.0.0 + # via partd markdown-it-py==3.0.0 - # via rich -markupsafe==2.1.3 - # via jinja2 -marshmallow==3.20.1 + # via + # flytekit + # rich +marshmallow==3.22.0 # via # dataclasses-json # marshmallow-enum @@ -193,7 +190,7 @@ marshmallow-enum==1.5.1 # flytekit marshmallow-jsonschema==0.13.0 # via flytekit -mashumaro==3.11 +mashumaro==3.13.1 # via # -r dev-requirements.in # flytekit @@ -201,157 +198,151 @@ mdurl==0.1.2 # via markdown-it-py mock==5.1.0 # via -r dev-requirements.in -more-itertools==10.1.0 - # via jaraco-classes -msal==1.26.0 +more-itertools==10.5.0 + # via + # jaraco-classes + # jaraco-functools +msal==1.31.0 # via # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.0.0 +msal-extensions==1.2.0 # via azure-identity -multidict==6.0.4 +multidict==6.1.0 # via # aiohttp # yarl -mypy==1.7.1 +mypy==1.11.2 # via -r dev-requirements.in mypy-extensions==1.0.0 # via # mypy # typing-inspect -nodeenv==1.8.0 +nodeenv==1.9.1 # via pre-commit -numpy==1.26.2 +numpy==2.1.1 # via - # flytekit + # dask # pandas # pyarrow + # scipy + # xgboost oauthlib==3.2.2 + # via requests-oauthlib +packaging==24.1 # via - # kubernetes - # requests-oauthlib -packaging==23.2 - # via - # docker + # dask # marshmallow # pytest -pandas==1.5.3 - # via flytekit -platformdirs==4.1.0 +pandas==2.2.3 # via - # virtualenv -pluggy==1.3.0 + # dask + # dask-expr +partd==1.4.2 + # via dask +platformdirs==4.3.6 + # via virtualenv +pluggy==1.5.0 # via pytest -portalocker==2.8.2 +portalocker==2.10.1 # via msal-extensions -pre-commit==3.5.0 +pre-commit==3.8.0 # via -r dev-requirements.in -protobuf==4.24.4 +proto-plus==1.24.0 + # via google-api-core +protobuf==5.28.2 # via # flyteidl # flytekit # google-api-core # googleapis-common-protos # grpcio-status - # protoc-gen-swagger -protoc-gen-swagger==0.1.0 + # proto-plus + # protoc-gen-openapiv2 +protoc-gen-openapiv2==0.0.1 # via flyteidl -pyarrow==14.0.1 - # via flytekit -pyasn1==0.5.1 +pyarrow==17.0.0 + # via dask-expr +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.1 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pygments==2.17.2 - # via rich -pyjwt[crypto]==2.8.0 +pyflakes==3.2.0 + # via autoflake +pygments==2.18.0 + # via + # flytekit + # rich +pyjwt[crypto]==2.9.0 # via # msal # pyjwt -pytest==7.4.3 +pytest==8.3.3 # via -r dev-requirements.in -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # arrow # botocore # croniter - # kubernetes # pandas python-json-logger==2.0.7 # via flytekit -python-slugify==8.0.1 - # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2023.3.post1 +pytz==2024.2 # via # croniter # pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via - # cookiecutter + # dask # flytekit - # kubernetes # pre-commit -requests==2.31.0 +requests==2.32.3 # via # azure-core # azure-datalake-store - # cookiecutter # docker # flytekit # gcsfs # google-api-core # google-cloud-storage - # kubernetes # msal # requests-oauthlib -requests-oauthlib==1.3.1 - # via - # google-auth-oauthlib - # kubernetes -rich==13.7.0 +requests-oauthlib==2.0.0 + # via google-auth-oauthlib +rich==13.9.1 # via - # cookiecutter # flytekit # rich-click -rich-click==1.7.2 +rich-click==1.8.3 # via flytekit rsa==4.9 # via google-auth -s3fs==2023.9.2 +s3fs==2024.9.0 # via flytekit -secretstorage==3.3.3 - # via keyring +scipy==1.14.1 + # via xgboost six==1.16.0 # via # azure-core # isodate - # kubernetes # python-dateutil -statsd==3.3.0 +statsd==4.0.1 # via flytekit -text-unidecode==1.3 - # via python-slugify -tomli==2.0.1 +toolz==0.12.1 # via - # autoflake - # black - # flake8-black - # mypy - # pytest -types-python-dateutil==2.8.19.14 - # via arrow -typing-extensions==4.8.0 + # dask + # partd +typing-extensions==4.12.2 # via # azure-core + # azure-identity # azure-storage-blob - # black # flytekit # mashumaro # mypy @@ -359,25 +350,21 @@ typing-extensions==4.8.0 # typing-inspect typing-inspect==0.9.0 # via dataclasses-json -urllib3==1.26.18 +tzdata==2024.2 + # via pandas +urllib3==2.2.3 # via # botocore # docker # flytekit - # kubernetes # requests -virtualenv==20.25.0 +virtualenv==20.26.6 # via pre-commit -websocket-client==1.7.0 - # via - # docker - # kubernetes wrapt==1.16.0 # via aiobotocore -yarl==1.9.4 +xgboost==2.1.1 + # via -r dev-requirements.in +yarl==1.13.1 # via aiohttp -zipp==3.17.0 +zipp==3.20.2 # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -# setuptools diff --git a/docs-requirements.in b/docs-requirements.in index 04c8ceb24..fb2cbba01 100644 --- a/docs-requirements.in +++ b/docs-requirements.in @@ -1,7 +1,7 @@ flytekit -fsspec<2023.10.0 +pydata-sphinx-theme +fsspec mashumaro -git+https://github.com/flyteorg/furo@main pillow psycopg2-binary flytekitplugins-deck-standard @@ -9,7 +9,6 @@ flytekitplugins-kfpytorch flytekitplugins-sqlalchemy jupytext sphinx -sphinx_rtd_theme sphinx-gallery sphinx-prompt sphinx-code-include @@ -27,6 +26,6 @@ grpcio grpcio-status myst-nb ipython!=8.7.0 -sphinx-tags==0.2.1 +sphinx-tags scikit-learn torch diff --git a/docs-requirements.txt b/docs-requirements.txt index 517adf4fd..8df2d7a24 100644 --- a/docs-requirements.txt +++ b/docs-requirements.txt @@ -1,88 +1,80 @@ # -# This file is autogenerated by pip-compile with Python 3.10 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# /Library/Developer/CommandLineTools/usr/bin/make docs-requirements.txt +# pip-compile docs-requirements.in # -adlfs==2023.10.0 +accessible-pygments==0.0.5 + # via pydata-sphinx-theme +adlfs==2024.7.0 # via flytekit -aiobotocore==2.5.4 +aiobotocore==2.15.1 # via s3fs -aiohttp==3.9.1 +aiohappyeyeballs==2.4.3 + # via aiohttp +aiohttp==3.10.8 # via # adlfs # aiobotocore # gcsfs # s3fs -aioitertools==0.11.0 +aioitertools==0.12.0 # via aiobotocore aiosignal==1.3.1 # via aiohttp -alabaster==0.7.15 +alabaster==1.0.0 # via sphinx -annotated-types==0.6.0 - # via pydantic -anyio==4.2.0 - # via azure-core -appnope==0.1.3 +appnope==0.1.4 # via ipykernel -arrow==1.3.0 - # via cookiecutter -astroid==3.0.2 +astroid==3.3.4 # via # -r docs-requirements.in # sphinx-autoapi asttokens==2.4.1 # via stack-data -async-timeout==4.0.3 - # via aiohttp -attrs==23.2.0 +attrs==24.2.0 # via # aiohttp + # jsonlines # jsonschema # jupyter-cache # referencing - # visions -azure-core==1.29.6 +azure-core==1.31.0 # via # adlfs # azure-identity # azure-storage-blob azure-datalake-store==0.0.53 # via adlfs -azure-identity==1.15.0 +azure-identity==1.18.0 # via adlfs -azure-storage-blob==12.19.0 +azure-storage-blob==12.23.1 # via adlfs -babel==2.14.0 - # via sphinx -beautifulsoup4==4.12.2 +babel==2.16.0 # via - # furo + # pydata-sphinx-theme + # sphinx +beautifulsoup4==4.12.3 + # via + # pydata-sphinx-theme # sphinx-code-include -binaryornot==0.4.4 - # via cookiecutter -botocore==1.31.17 +botocore==1.35.23 # via aiobotocore -cachetools==5.3.2 +cachetools==5.5.0 # via google-auth -certifi==2023.11.17 +certifi==2024.8.30 # via # kubernetes # requests -cffi==1.16.0 + # sphinx-prompt +cffi==1.17.1 # via # azure-datalake-store # cryptography -cfgv==3.4.0 - # via pre-commit -chardet==5.2.0 - # via binaryornot charset-normalizer==3.3.2 # via requests click==8.1.7 # via - # cookiecutter # flytekit # jupyter-cache # rich-click @@ -90,29 +82,19 @@ cloudpickle==3.0.0 # via # flytekit # flytekitplugins-kfpytorch -comm==0.2.1 - # via - # ipykernel - # ipywidgets -contourpy==1.2.0 - # via matplotlib -cookiecutter==2.5.0 - # via flytekit -croniter==2.0.1 +comm==0.2.2 + # via ipykernel +croniter==3.0.3 # via flytekit -cryptography==41.0.7 +cryptography==43.0.1 # via # azure-identity # azure-storage-blob # msal # pyjwt -cycler==0.12.1 - # via matplotlib -dacite==1.8.1 - # via ydata-profiling dataclasses-json==0.5.9 # via flytekit -debugpy==1.8.0 +debugpy==1.8.6 # via ipykernel decorator==5.1.1 # via @@ -120,54 +102,47 @@ decorator==5.1.1 # ipython diskcache==5.6.3 # via flytekit -distlib==0.3.8 - # via virtualenv -docker==6.1.3 +docker==7.1.0 # via flytekit -docstring-parser==0.15 +docstring-parser==0.16 # via flytekit -docutils==0.17.1 +docutils==0.21.2 # via # myst-parser + # pydata-sphinx-theme # sphinx # sphinx-panels - # sphinx-rtd-theme + # sphinx-prompt # sphinx-tabs -exceptiongroup==1.2.0 - # via - # anyio - # ipython -executing==2.0.1 +durationpy==0.9 + # via kubernetes +executing==2.1.0 # via stack-data -fastjsonschema==2.19.1 +fastjsonschema==2.20.0 # via nbformat -filelock==3.13.1 - # via - # torch - # virtualenv -flyteidl==1.10.6 +filelock==3.16.1 + # via torch +flyteidl==1.13.4 # via # flytekit # flytekitplugins-kfpytorch -flytekit==1.10.2 +flytekit==1.13.7 # via # -r docs-requirements.in # flytekitplugins-deck-standard # flytekitplugins-kfpytorch # flytekitplugins-sqlalchemy -flytekitplugins-deck-standard==1.10.2 +flytekitplugins-deck-standard==1.13.7 # via -r docs-requirements.in -flytekitplugins-kfpytorch==1.10.2 +flytekitplugins-kfpytorch==1.13.7 # via -r docs-requirements.in -flytekitplugins-sqlalchemy==1.10.2 +flytekitplugins-sqlalchemy==1.13.7 # via -r docs-requirements.in -fonttools==4.47.0 - # via matplotlib frozenlist==1.4.1 # via # aiohttp # aiosignal -fsspec==2023.9.2 +fsspec==2024.9.0 # via # -r docs-requirements.in # adlfs @@ -175,15 +150,13 @@ fsspec==2023.9.2 # gcsfs # s3fs # torch -furo @ git+https://github.com/flyteorg/furo@main - # via -r docs-requirements.in -gcsfs==2023.9.2 +gcsfs==2024.9.0.post1 # via flytekit -google-api-core==2.15.0 +google-api-core==2.20.0 # via # google-cloud-core # google-cloud-storage -google-auth==2.26.1 +google-auth==2.35.0 # via # gcsfs # google-api-core @@ -191,127 +164,111 @@ google-auth==2.26.1 # google-cloud-core # google-cloud-storage # kubernetes -google-auth-oauthlib==1.2.0 +google-auth-oauthlib==1.2.1 # via gcsfs google-cloud-core==2.4.1 # via google-cloud-storage -google-cloud-storage==2.14.0 +google-cloud-storage==2.18.2 # via gcsfs -google-crc32c==1.5.0 +google-crc32c==1.6.0 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.7.0 +google-resumable-media==2.7.2 # via google-cloud-storage -googleapis-common-protos==1.62.0 +googleapis-common-protos==1.65.0 # via # flyteidl # flytekit # google-api-core # grpcio-status -grpcio==1.60.0 + # protoc-gen-openapiv2 +grpcio==1.66.2 # via # -r docs-requirements.in # flytekit # grpcio-status -grpcio-status==1.60.0 +grpcio-status==1.66.2 # via # -r docs-requirements.in # flytekit -htmlmin==0.1.12 - # via ydata-profiling -identify==2.5.33 - # via pre-commit -idna==3.6 +idna==3.10 # via - # anyio # requests + # sphinx-prompt # yarl -imagehash==4.3.1 - # via - # visions - # ydata-profiling imagesize==1.4.1 # via sphinx -importlib-metadata==7.0.1 +importlib-metadata==8.5.0 # via # flytekit # jupyter-cache - # keyring # myst-nb -ipykernel==6.28.0 +ipykernel==6.29.5 # via myst-nb -ipython==8.20.0 +ipython==8.28.0 # via # -r docs-requirements.in # ipykernel - # ipywidgets # myst-nb -ipywidgets==8.1.1 - # via flytekitplugins-deck-standard isodate==0.6.1 # via azure-storage-blob -jaraco-classes==3.3.0 +jaraco-classes==3.4.0 + # via keyring +jaraco-context==6.0.1 + # via keyring +jaraco-functools==4.1.0 # via keyring jedi==0.19.1 # via ipython -jinja2==3.0.3 +jinja2==3.1.4 # via - # cookiecutter # myst-parser # sphinx # sphinx-autoapi - # sphinx-tabs # torch - # ydata-profiling jmespath==1.0.1 # via botocore -joblib==1.3.2 +joblib==1.4.2 # via # flytekit - # phik # scikit-learn -jsonpickle==3.0.2 +jsonlines==4.0.0 + # via flytekit +jsonpickle==3.3.0 # via flytekit -jsonschema==4.20.0 +jsonschema==4.23.0 # via nbformat jsonschema-specifications==2023.12.1 # via jsonschema -jupyter-cache==0.6.1 +jupyter-cache==1.0.0 # via myst-nb -jupyter-client==8.6.0 +jupyter-client==8.6.3 # via # ipykernel # nbclient -jupyter-core==5.7.1 +jupyter-core==5.7.2 # via # ipykernel # jupyter-client # nbclient # nbformat -jupyterlab-widgets==3.0.9 - # via ipywidgets -jupytext==1.16.0 +jupytext==1.16.4 # via -r docs-requirements.in -keyring==24.3.0 +keyring==25.4.1 # via flytekit -kiwisolver==1.4.5 - # via matplotlib -kubernetes==29.0.0 - # via flytekit -llvmlite==0.41.1 - # via numba -markdown==3.5.1 - # via flytekitplugins-deck-standard -markdown-it-py==2.2.0 +kubernetes==31.0.0 + # via flytekitplugins-kfpytorch +markdown-it-py==3.0.0 # via + # flytekit # jupytext # mdit-py-plugins # myst-parser # rich -markupsafe==2.1.3 +markupsafe==2.1.5 # via jinja2 -marshmallow==3.20.1 +marshmallow==3.22.0 # via # dataclasses-json # marshmallow-enum @@ -322,233 +279,170 @@ marshmallow-enum==1.5.1 # flytekit marshmallow-jsonschema==0.13.0 # via flytekit -mashumaro==3.11 +mashumaro==3.13.1 # via # -r docs-requirements.in # flytekit -matplotlib==3.8.2 - # via - # phik - # seaborn - # wordcloud - # ydata-profiling -matplotlib-inline==0.1.6 +matplotlib-inline==0.1.7 # via # ipykernel # ipython -mdit-py-plugins==0.3.5 +mdit-py-plugins==0.4.2 # via # jupytext # myst-parser mdurl==0.1.2 # via markdown-it-py -more-itertools==10.2.0 - # via jaraco-classes +more-itertools==10.5.0 + # via + # jaraco-classes + # jaraco-functools mpmath==1.3.0 # via sympy -msal==1.26.0 +msal==1.31.0 # via # azure-datalake-store # azure-identity # msal-extensions -msal-extensions==1.1.0 +msal-extensions==1.2.0 # via azure-identity -multidict==6.0.4 +multidict==6.1.0 # via # aiohttp # yarl -multimethod==1.10 - # via - # visions - # ydata-profiling mypy-extensions==1.0.0 # via typing-inspect -myst-nb==0.17.2 +myst-nb==1.1.2 # via -r docs-requirements.in -myst-parser==0.18.1 +myst-parser==4.0.0 # via myst-nb -nbclient==0.7.4 +nbclient==0.10.0 # via # jupyter-cache # myst-nb -nbformat==5.9.2 +nbformat==5.10.4 # via # jupyter-cache # jupytext # myst-nb # nbclient -nest-asyncio==1.5.8 +nest-asyncio==1.6.0 # via ipykernel -networkx==3.2.1 +networkx==3.3 + # via torch +numpy==2.1.1 # via - # torch - # visions -nodeenv==1.8.0 - # via pre-commit -numba==0.58.1 - # via ydata-profiling -numpy==1.25.2 - # via - # contourpy - # flytekit - # imagehash - # matplotlib - # numba # pandas - # patsy - # phik - # pyarrow - # pywavelets # scikit-learn # scipy - # seaborn - # statsmodels - # visions - # wordcloud - # ydata-profiling oauthlib==3.2.2 # via # kubernetes # requests-oauthlib -packaging==23.2 +packaging==24.1 # via - # docker # ipykernel # jupytext # marshmallow - # matplotlib - # msal-extensions - # plotly + # pydata-sphinx-theme # sphinx - # statsmodels -pandas==1.5.3 - # via - # flytekit - # phik - # seaborn - # statsmodels - # visions - # ydata-profiling -parso==0.8.3 +pandas==2.2.3 + # via flytekitplugins-sqlalchemy +parso==0.8.4 # via jedi -patsy==0.5.6 - # via statsmodels pexpect==4.9.0 # via ipython -phik==0.12.3 - # via ydata-profiling -pillow==10.2.0 +pillow==10.4.0 # via # -r docs-requirements.in - # imagehash - # matplotlib # sphinx-gallery - # visions - # wordcloud -platformdirs==4.1.0 - # via - # jupyter-core - # virtualenv -plotly==5.18.0 - # via flytekitplugins-deck-standard -portalocker==2.8.2 +platformdirs==4.3.6 + # via jupyter-core +portalocker==2.10.1 # via msal-extensions -pre-commit==3.6.0 - # via sphinx-tags -prompt-toolkit==3.0.43 +prompt-toolkit==3.0.48 # via ipython -protobuf==4.24.4 +proto-plus==1.24.0 + # via google-api-core +protobuf==5.28.2 # via # flyteidl # flytekit # google-api-core # googleapis-common-protos # grpcio-status - # protoc-gen-swagger -protoc-gen-swagger==0.1.0 + # proto-plus + # protoc-gen-openapiv2 +protoc-gen-openapiv2==0.0.1 # via flyteidl -psutil==5.9.7 +psutil==6.0.0 # via ipykernel psycopg2-binary==2.9.9 # via -r docs-requirements.in ptyprocess==0.7.0 # via pexpect -pure-eval==0.2.2 +pure-eval==0.2.3 # via stack-data -pyarrow==14.0.2 - # via flytekit -pyasn1==0.5.1 +pyasn1==0.6.1 # via # pyasn1-modules # rsa -pyasn1-modules==0.3.0 +pyasn1-modules==0.4.1 # via google-auth -pycparser==2.21 +pycparser==2.22 # via cffi -pydantic==2.5.3 - # via ydata-profiling -pydantic-core==2.14.6 - # via pydantic -pygments==2.17.2 +pydata-sphinx-theme==0.15.4 + # via -r docs-requirements.in +pygments==2.18.0 # via - # furo + # accessible-pygments + # flytekit # ipython + # pydata-sphinx-theme # rich # sphinx # sphinx-prompt # sphinx-tabs -pyjwt[crypto]==2.8.0 +pyjwt[crypto]==2.9.0 # via # msal # pyjwt -pyparsing==3.1.1 - # via matplotlib -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via - # arrow # botocore # croniter # jupyter-client # kubernetes - # matplotlib # pandas python-json-logger==2.0.7 # via flytekit -python-slugify==8.0.1 - # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2023.3.post1 +pytz==2024.2 # via # croniter # pandas -pywavelets==1.5.0 - # via imagehash -pyyaml==6.0.1 +pyyaml==6.0.2 # via - # cookiecutter # flytekit # jupyter-cache # jupytext # kubernetes # myst-nb # myst-parser - # pre-commit # sphinx-autoapi - # ydata-profiling -pyzmq==25.1.2 +pyzmq==26.2.0 # via # ipykernel # jupyter-client -referencing==0.32.1 +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -requests==2.31.0 +requests==2.32.3 # via # azure-core # azure-datalake-store - # cookiecutter # docker # flytekit # gcsfs @@ -559,61 +453,48 @@ requests==2.31.0 # requests-oauthlib # sphinx # sphinxcontrib-youtube - # ydata-profiling -requests-oauthlib==1.3.1 +requests-oauthlib==2.0.0 # via # google-auth-oauthlib # kubernetes -rich==13.7.0 +rich==13.9.1 # via - # cookiecutter # flytekit # rich-click -rich-click==1.7.3 +rich-click==1.8.3 # via flytekit -rpds-py==0.16.2 +rpds-py==0.20.0 # via # jsonschema # referencing rsa==4.9 # via google-auth -s3fs==2023.9.2 +s3fs==2024.9.0 # via flytekit -scikit-learn==1.3.2 +scikit-learn==1.5.2 # via -r docs-requirements.in -scipy==1.11.4 - # via - # imagehash - # phik - # scikit-learn - # statsmodels - # ydata-profiling -seaborn==0.12.2 - # via ydata-profiling +scipy==1.14.1 + # via scikit-learn six==1.16.0 # via # asttokens # azure-core # isodate # kubernetes - # patsy # python-dateutil # sphinx-code-include # sphinxext-remoteliteralinclude -sniffio==1.3.0 - # via anyio snowballstemmer==2.2.0 # via sphinx -soupsieve==2.5 +soupsieve==2.6 # via beautifulsoup4 -sphinx==4.5.0 +sphinx==8.0.2 # via # -r docs-requirements.in - # furo # myst-nb # myst-parser + # pydata-sphinx-theme # sphinx-autoapi - # sphinx-basic-ng # sphinx-code-include # sphinx-copybutton # sphinx-fontawesome @@ -621,154 +502,112 @@ sphinx==4.5.0 # sphinx-panels # sphinx-prompt # sphinx-reredirects - # sphinx-rtd-theme # sphinx-tabs # sphinx-tags - # sphinxcontrib-jquery # sphinxcontrib-youtube # sphinxext-remoteliteralinclude -sphinx-autoapi==2.0.1 +sphinx-autoapi==3.3.2 # via -r docs-requirements.in -sphinx-basic-ng==1.0.0b2 - # via furo -sphinx-code-include==1.1.1 +sphinx-code-include==1.4.0 # via -r docs-requirements.in sphinx-copybutton==0.5.2 # via -r docs-requirements.in sphinx-fontawesome==0.0.6 # via -r docs-requirements.in -sphinx-gallery==0.15.0 +sphinx-gallery==0.17.1 # via -r docs-requirements.in -sphinx-panels==0.6.0 +sphinx-panels==0.4.1 # via -r docs-requirements.in -sphinx-prompt==1.5.0 +sphinx-prompt==1.9.0 # via -r docs-requirements.in -sphinx-reredirects==0.1.3 +sphinx-reredirects==0.1.5 # via -r docs-requirements.in -sphinx-rtd-theme==1.3.0 +sphinx-tabs==3.4.5 # via -r docs-requirements.in -sphinx-tabs==3.4.0 +sphinx-tags==0.4 # via -r docs-requirements.in -sphinx-tags==0.2.1 - # via -r docs-requirements.in -sphinxcontrib-applehelp==1.0.4 +sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-devhelp==2.0.0 # via sphinx -sphinxcontrib-htmlhelp==2.0.1 +sphinxcontrib-htmlhelp==2.1.0 # via sphinx -sphinxcontrib-jquery==4.1 - # via sphinx-rtd-theme sphinxcontrib-jsmath==1.0.1 # via sphinx sphinxcontrib-mermaid==0.9.2 # via -r docs-requirements.in -sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-qthelp==2.0.0 # via sphinx -sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-serializinghtml==2.0.0 # via sphinx -sphinxcontrib-youtube==1.3.0 +sphinxcontrib-youtube==1.4.1 # via -r docs-requirements.in sphinxext-remoteliteralinclude==0.4.0 # via -r docs-requirements.in -sqlalchemy==2.0.25 +sqlalchemy==2.0.35 # via # flytekitplugins-sqlalchemy # jupyter-cache stack-data==0.6.3 # via ipython -statsd==3.3.0 +statsd==4.0.1 # via flytekit -statsmodels==0.14.1 - # via ydata-profiling -sympy==1.12 +sympy==1.13.3 # via torch tabulate==0.9.0 # via jupyter-cache -tangled-up-in-unicode==0.2.0 - # via visions -tenacity==8.2.3 - # via plotly -text-unidecode==1.3 - # via python-slugify -threadpoolctl==3.2.0 +threadpoolctl==3.5.0 # via scikit-learn -toml==0.10.2 - # via jupytext -torch==2.1.2 +torch==2.4.1 # via -r docs-requirements.in -tornado==6.4 +tornado==6.4.1 # via # ipykernel # jupyter-client -tqdm==4.66.1 - # via ydata-profiling -traitlets==5.14.1 +traitlets==5.14.3 # via # comm # ipykernel # ipython - # ipywidgets # jupyter-client # jupyter-core # matplotlib-inline # nbclient # nbformat -typeguard==4.1.5 - # via ydata-profiling -types-python-dateutil==2.8.19.20240106 - # via arrow -typing-extensions==4.9.0 - # via - # anyio - # astroid +typing-extensions==4.12.2 + # via # azure-core + # azure-identity # azure-storage-blob # flytekit # mashumaro # myst-nb - # myst-parser - # pydantic - # pydantic-core + # pydata-sphinx-theme # rich-click # sqlalchemy # torch - # typeguard # typing-inspect typing-inspect==0.9.0 # via dataclasses-json -unidecode==1.3.7 - # via sphinx-autoapi -urllib3==1.26.18 +tzdata==2024.2 + # via pandas +urllib3==2.2.3 # via # botocore # docker # flytekit # kubernetes # requests -virtualenv==20.25.0 - # via pre-commit -visions[type-image-path]==0.7.5 - # via - # visions - # ydata-profiling + # sphinx-prompt wcwidth==0.2.13 # via prompt-toolkit -websocket-client==1.7.0 - # via - # docker - # kubernetes -widgetsnbextension==4.0.9 - # via ipywidgets -wordcloud==1.9.3 - # via ydata-profiling +websocket-client==1.8.0 + # via kubernetes wrapt==1.16.0 # via aiobotocore -yarl==1.9.4 +yarl==1.13.1 # via aiohttp -ydata-profiling==4.6.4 - # via flytekitplugins-deck-standard -zipp==3.17.0 +zipp==3.20.2 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/docs/conf.py b/docs/conf.py index d8ce10fca..9853316bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -97,7 +97,12 @@ ] include_patterns = [ + "index.md", + "_tags/tagsindex", + "*.md", + "**/*.md", "auto_examples/**/index.md", + "auto_examples/**/*.md", ] # The master toctree document. @@ -121,7 +126,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "furo" +html_theme = "pydata_sphinx_theme" html_title = "Flyte" announcement = """ @@ -129,6 +134,7 @@ Please visit the new documentation here. """ +""" html_theme_options = { "light_css_variables": { "color-brand-primary": "#4300c9", @@ -148,6 +154,7 @@ "docs_path": "docs", # path to documentation source "announcement": announcement, } +""" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -183,9 +190,6 @@ "extending", "productionizing", "testing", - "bigquery_plugin", - "databricks_plugin", - "snowflake_plugin", ] # intersphinx configuration diff --git a/docs/contribute.md b/docs/contribute.md deleted file mode 100644 index 2bd45bff3..000000000 --- a/docs/contribute.md +++ /dev/null @@ -1,359 +0,0 @@ -# Contributing to User Guide, Tutorials and Integrations - -```{eval-rst} -.. tags:: Contribute, Basic -``` - -The examples documentation provides an easy way for the community to learn about the rich set of -features that Flyte offers, and we are constantly improving them with your help! - -Whether you're a novice or experienced software engineer, data scientist, or machine learning -practitioner, all contributions are welcome! - -## How to contribute - -The Flyte documentation examples guides are broken up into three types: - -1. {ref}`User Guides `: These are short, simple guides that demonstrate how to use a particular Flyte feature. The Markdown files live in the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide), while the example code lives in flytesnacks. - These examples should be runnable locally. **Note:** The comments in the user guide Python files **will not** be rendered as user-facing docs. To update the user-facing documentation, open a pull request in the flyte repo. -2. {ref}`Tutorials `: These are longer, more advanced guides that use multiple Flyte features to solve - real-world problems. Tutorials are generally more complex examples that may require extra setup or that can only run - on larger clusters. -3. {ref}`Integrations `: These examples showcase how to use the Flyte plugins that integrate with the - broader data and ML ecosystem. - -The first step to contributing an example is to open up a -[documentation issue](https://github.com/flyteorg/flyte/issues/new?assignees=&labels=documentation%2Cuntriaged&template=docs_issue.yaml&title=%5BDocs%5D+) -to articulate the kind of example you want to write. The Flyte maintainers will guide and help you figure out where your example would fit best. - -## Creating an example - -:::{admonition} Prerequisites -Follow the {ref}`env_setup` guide to get your development environment ready. -::: - -The `flytesnacks` repo examples live in the `examples` directory, where each -subdirectory contains a self-contained example project that covers a particular -feature, integration, or use case. - -```{code-block} bash -examples -├── README.md -├── airflow_plugin -├── athena_plugin -├── aws_batch_plugin -├── basics -├── bigquery_plugin -... -``` - -### Adding an example script to an existing project - -If you're adding a new example to an existing project, you can simply create a -new `.py` file in the appropriate directory. For example, if you want to add a new -example in the `examples/basics` project, simply do: - -```{prompt} bash -touch examples/basics/my_new_example.py -``` - -If you are creating a new user guide example, you can reference the code in the user guide documentation using the `rli` (remoteliteralinclude) directive. - -If you are creating a new integration or tutorial example, add the example to the `README.md` file of the -example project as an entry in the `auto-examples-toc` directive: - -````{code-block} -```{auto-examples-toc} -... -my_new_example -``` -```` - -### Creating a new example project - -````{important} -If you're creating a new example in the User Guide, Tutorials, or Integrations -that doesn't fit into any of the existing subdirectories, you'll need to setup a -new example project. - -In the `flytesnacks` root directory, create one with: - -```{prompt} bash -./scripts/create-example-project new_example_project -``` - -This will create a new directory under `examples`: - -```{code-block} bash -examples/new_example_project -├── Dockerfile -├── README.md -├── new_example_project -│   ├── __init__.py -│   └── example.py -├── requirements.in -└── requirements.txt -``` - -```` - -### Creating python examples - -#### User guide examples - -If you are writing a user guide example, write your example Python script in regular Python, with regular comments. These comments **will not** be extracted from the Python file and turned into user-facing documentation. To update user-facing user guide documentation, edit the user guide files in the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide). You can use the `rli` ([remoteliteralinclude](https://github.com/wpilibsuite/sphinxext-remoteliteralinclude/blob/main/README.md)) directive to include snippets of code from your example Python file. - -#### Tutorial or integration examples - -If you are writing a tutorial or integration example, write your example Python script in [percent format](https://jupytext.readthedocs.io/en/latest/formats.html#the-percent-format), -which allows you to interleave python code and markdown in the same file. Each -code cell should be delimited by `# %%`, and each markdown cell should be -delimited with `# %% [markdown]`. - -```{code-block} python -# %% -print("Hello World!") - -# %% [markdown] -# This is a markdown cell - -# %% -print("This is another code cell") -``` - -Markdown cells have access to sphinx directives through the -[myst markdown](https://myst-parser.readthedocs.io/en/latest/) format, -which is a flavor of markdown that makes it easier to write documentation while -giving you the utilities of sphinx. `flytesnacks` uses the -[myst-nb](https://myst-nb.readthedocs.io/en/latest/) and -[jupytext](https://github.com/mwouts/jupytext) packages to interpret the -python files as rst-compatible files. - -### Writing examples: explain what the code does - -Following the [literate programming](https://en.wikipedia.org/wiki/Literate_programming) paradigm, make sure to -interleave explanations in the `*.py` files containing the code example. - -:::{admonition} A Simple Example -:class: tip - -Here's a code snippet that defines a function that takes two positional arguments and one keyword argument: - -```python -def function(x, y, z=3): - return x + y * z -``` - -As you can see, `function` adds the two first arguments and multiplies the sum with the third keyword -argument. Can you think of a better name for this `function`? -::: - -Explanations don't have to be this detailed for such a simple example, but you can imagine how this makes for a better -reading experience for more complicated examples. - -### Creating examples in other formats - -Writing examples in `.py` files is preferred since they are easily tested and -packaged, but `flytesnacks` also supports examples written in `.ipynb` and -`.md` files in myst markdown format. This is useful in the following cases: - -- `.ipynb`: When a `.py` example needs a companion jupyter notebook as a task, e.g. - to illustrate the use of {py:class}`~flytekitplugins.papermill.NotebookTask`s, - or when an example is intended to be run from a notebook. -- `.md`: When a piece of documentation doesn't require testable or packaged - flyte tasks/workflows, an example page can be written as a myst markdown file. - -**Note:** If you want to add Markdown files to a user guide example project, add them to the [flyte repository](https://github.com/flyteorg/flyte/tree/master/docs/user_guide) instead. - -## Writing a README - -The `README.md` file needs to capture the _what_, _why_, and _how_ of the example. - -- What is the integration about? Its features, etc. -- Why do we need this integration? How is it going to benefit the Flyte users? -- Showcase the uniqueness of the integration -- How to install the plugin? - -Finally, **for tutorials and integrations only**, write a `auto-examples-toc` directive at the bottom of the file: - -````{code-block} -```{auto-examples-toc} -example_01 -example_02 -example_03 -``` -```` - -Where `example_01`, `example_02`, and `example_03` are the python module -names of the examples under the `new_example_project` directory. These can also -be the names of the `.ipynb` or `.md` files (but without the file extension). - -:::{tip} -Refer to any subdirectory in the `examples` directory -::: - -## Test your code - -If the example code can be run locally, just use `python .py` to run it. - -### Testing on a cluster - -Install {doc}`flytectl `, the commandline interface for flyte. - -:::{note} -Learn more about installation and configuration of Flytectl [here](https://docs.flyte.org/en/latest/flytectl/docs_index.html). -::: - -Start a Flyte demo cluster with: - -``` -flytectl demo start -``` - -### Testing the `basics` project examples on a local demo cluster - -In this example, we'll build the `basics` project: - -```{prompt} bash -# from flytesnacks root directory -cd examples/basics -``` - -Build the container: - -```{prompt} bash -docker build . --tag "basics:v1" -f Dockerfile -``` - -Package the examples by running: - -```{prompt} bash -pyflyte --pkgs basics package --image basics:v1 -f -``` - -Register the examples by running - -```{prompt} bash -flytectl register files \ - -p flytesnacks \ - -d development \ - --archive flyte-package.tgz \ - --version v1 -``` - -Visit `https://localhost:30081/console` to view the Flyte console, which consists -of the examples present in the `flytesnacks/core` directory. - -### Updating dependencies - -:::{admonition} Prerequisites -Install [pip-tools](https://pypi.org/project/pip-tools/) in your development -environment with: - -```{prompt} bash -pip install pip-tools -``` - -::: - -If you've updated the dependencies of the project, update the `requirements.txt` -file by running: - -```{prompt} bash -pip-compile requirements.in --upgrade --verbose --resolver=backtracking -``` - -### Rebuild the image - -If you've updated the source code or dependencies of the project, and rebuild -the image with: - -```{prompt} bash -docker build . --tag "basics:v2" -f core/Dockerfile -pyflyte --pkgs basics package --image basics:v2 -f -flytectl register files \ - -p flytesnacks \ - -d development \ - --archive flyte-package.tgz \ - --version v2 -``` - -Refer to {ref}`this guide ` -if the code in itself is updated and requirements.txt is the same. - -## Pre-commit hooks - -We use [pre-commit](https://pre-commit.com/) to automate linting and code formatting on every commit. -Configured hooks include [ruff](https://github.com/astral-sh/ruff) to ensure newlines are added to the end of files, and there is proper spacing in files. - -We run all those hooks in CI, but if you want to run them locally on every commit, run `pre-commit install` after -installing the dev environment requirements. In case you want to disable `pre-commit` hooks locally, run -`pre-commit uninstall`. More info [here](https://pre-commit.com/). - -### Formatting - -We use [ruff](https://github.com/astral-sh/ruff) to autoformat code. They -are configured as git hooks in `pre-commit`. Run `make fmt` to format your code. - -### Spell-checking - -We use [codespell](https://github.com/codespell-project/codespell) to catch common misspellings. Run -`make spellcheck` to spell-check the changes. - -## Update Documentation Pages - -The `docs/conf.py` contains the sphinx configuration for building the -`flytesnacks` documentation. - -At build-time, the `flytesnacks` sphinx build system will convert all of the -projects in the `examples` directory into `docs/auto_examples`, and will be -available in the documentation. - -::::{important} - -The docs build system will convert the `README.md` files in each example -project into a `index.md` file, so you can reference the root page of each -example project, e.g., in myst markdown format, you can write a table-of-content -directive like so: - -:::{code-block} - -```{toc} -auto_examples/basics/index -``` - -::: - -:::: - -If you've created a new example project, you'll need to add the `index` page -in the table of contents in `docs/index.md` to make sure the project -shows up in the documentation. Additonally, you'll need to update the appropriate -`list-table` directive in `docs/userguide.md`, `docs/tutorials.md`, or -`docs/integrations.md` so that it shows up in the respective section of the -documentation. - -## Build the documentation locally - -Verify that the code and documentation look as expected: - -- Learn about the documentation tools [here](https://docs.flyte.org/en/latest/community/contribute.html#documentation) -- Install the requirements by running `pip install -r docs-requirements.txt`. -- Run `make -C docs html` - - ```{tip} - To run a fresh build, run `make -C docs clean html`. - ``` - -- Open the HTML pages present in the `docs/_build` directory in the browser with - `open docs/_build/index.html` - -## Create a pull request - -Create the pull request, then ensure that the docs are rendered correctly by clicking on the documentation check. - -```{image} https://raw.githubusercontent.com/flyteorg/static-resources/main/common/test_docs_link.png -:alt: Docs link in a PR -``` - -You can refer to [this PR](https://github.com/flyteorg/flytesnacks/pull/332) for the exact changes required. diff --git a/docs/index.md b/docs/index.md index a67bfb480..335236f8f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,144 +12,14 @@ next-page-title: Quickstart guide (getting_started_index)= -# Introduction to Flyte +# Flytesnacks -Flyte is a workflow orchestrator that unifies machine learning, data engineering, and data analytics stacks for building robust and reliable applications. Flyte features: - -- Reproducible, repeatable workflows -- Strongly typed interfaces -- Structured datasets to enable easy conversion of dataframes between types, and column-level type checking -- Easy movement of data between local and cloud storage -- Easy tracking of data lineages -- Built-in data and artifact visualization - -For a full list of feature, see the [Flyte features page](https://flyte.org/features). - -## Basic Flyte components - -Flyte is made up of a user plane, control plane, and data plane. - -- The **user plane** contains the elements you need to develop the code that will implement your application's directed acyclic graph (DAG). These elements are FlyteKit and Flytectl. Data scientists and machine learning engineers primarily work in the user plane. -- The **control plane** is part of the Flyte backend that is configured by platform engineers or others tasked with setting up computing infrastructure. It consists of FlyteConsole and FlyteAdmin, which serves as the main Flyte API to process requests from clients in the user plane. The control plane sends workflow execution requests to the data plane for execution, and stores information such as current and past running workflows, and provides that information upon request. -- The **data plane** is another part of the Flyte backend that contains FlytePropeller, the core engine of Flyte that executes workflows. FlytePropeller is designed as a [Kubernetes Controller](https://kubernetes.io/docs/concepts/architecture/controller/). The data plane sends status events back to the control plane so that information can be stored and surfaced to end users. - -## Next steps - -- To quickly try out Flyte on your machine, follow the {ref}`Quickstart guide `. -- To create a Flyte project that can be used to package workflow code for deployment to a Flyte cluster, see {ref}`"Getting started with workflow development" `. -- To set up a Flyte cluster, see the [Deployment documentation](https://docs.flyte.org/en/latest/deployment/index.html). - -```{toctree} -:maxdepth: 1 -:hidden: - -Getting Started -User Guide -Tutorials -Concepts -Deployment -API Reference -Community -``` - -```{toctree} -:maxdepth: -1 -:caption: Getting Started -:hidden: - -Introduction to Flyte -Quickstart guide -Getting started with workflow development -Flyte fundamentals -Core use cases -``` - -```{toctree} -:maxdepth: -1 -:caption: User Guide -:hidden: - -📖 User Guide -🌳 Environment Setup -🔤 Basics -⌨️ Data Types and IO -🔮 Advanced Composition -🧩 Customizing Dependencies -🏡 Development Lifecycle -⚗️ Testing -🚢 Productionizing -🏗 Extending -📝 Contributing -``` - -```{toctree} -:maxdepth: -1 -:caption: Tutorials -:hidden: - -Tutorials -Model Training -feature_engineering -bioinformatics_examples -flyte_lab -``` - -```{toctree} -:maxdepth: -1 -:caption: Integrations -:hidden: - -Integrations -auto_examples/airflow_agent/index -auto_examples/airflow_plugin/index -auto_examples/athena_plugin/index -auto_examples/aws_batch_plugin/index -auto_examples/bigquery_agent/index -auto_examples/chatgpt_agent/index -auto_examples/k8s_dask_plugin/index -auto_examples/databricks_agent/index -auto_examples/dbt_plugin/index -auto_examples/dolt_plugin/index -auto_examples/duckdb_plugin/index -auto_examples/flyteinteractive_plugin/index -auto_examples/greatexpectations_plugin/index -auto_examples/hive_plugin/index -auto_examples/k8s_pod_plugin/index -auto_examples/mlflow_plugin/index -auto_examples/mmcloud_agent/index -auto_examples/modin_plugin/index -auto_examples/kfmpi_plugin/index -auto_examples/nim_plugin/index -auto_examples/onnx_plugin/index -auto_examples/openai_batch_agent/index -auto_examples/papermill_plugin/index -auto_examples/pandera_plugin/index -auto_examples/kfpytorch_plugin/index -auto_examples/ray_plugin/index -auto_examples/sagemaker_inference_agent/index -auto_examples/sensor/index -auto_examples/snowflake_agent/index -auto_examples/k8s_spark_plugin/index -auto_examples/sql_plugin/index -auto_examples/kftensorflow_plugin/index -auto_examples/whylogs_plugin/index -``` - -```{toctree} -:maxdepth: -1 -:caption: Deprecated integrations -:hidden: - -Deprecated integrations -BigQuery plugin -Databricks plugin -Snowflake plugin -``` +Welcome to Flytesnacks. ```{toctree} :maxdepth: -1 -:caption: Tags :hidden: -_tags/tagsindex +Tutorials +Integrations ``` diff --git a/docs/integrations.md b/docs/integrations.md deleted file mode 100644 index f4ff3e08f..000000000 --- a/docs/integrations.md +++ /dev/null @@ -1,190 +0,0 @@ -(integrations)= - -# Integrations - -Flyte is designed to be highly extensible and can be customized in multiple ways. - -```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. -``` - -## Flytekit Plugins - -Flytekit plugins are simple plugins that can be implemented purely in python, unit tested locally and allow extending -Flytekit functionality. These plugins can be anything and for comparison can be thought of like -[Airflow Operators](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/index.html). - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`SQL ` - - Execute SQL queries as tasks. -* - {doc}`Great Expectations ` - - Validate data with `great_expectations`. -* - {doc}`Papermill ` - - Execute Jupyter Notebooks with `papermill`. -* - {doc}`Pandera ` - - Validate pandas dataframes with `pandera`. -* - {doc}`Modin ` - - Scale pandas workflows with `modin`. -* - {doc}`Dolt ` - - Version your SQL database with `dolt`. -* - {doc}`DBT ` - - Run and test your `dbt` pipelines in Flyte. -* - {doc}`WhyLogs ` - - `whylogs`: the open standard for data logging. -* - {doc}`MLFlow ` - - `mlflow`: the open standard for model tracking. -* - {doc}`ONNX ` - - Convert ML models to ONNX models seamlessly. -* - {doc}`DuckDB ` - - Run analytical queries using DuckDB. -* - {doc}`Weights and Biases ` - - `wandb`: Machine learning platform to build better models faster. -* - {doc}`NIM ` - - Serve optimized model containers with NIM. -``` - -:::{dropdown} {fa}`info-circle` Using flytekit plugins -:animate: fade-in-slide-down - -Data is automatically marshalled and unmarshalled in and out of the plugin. Users should mostly implement the -{py:class}`~flytekit.core.base_task.PythonTask` API defined in Flytekit. - -Flytekit Plugins are lazily loaded and can be released independently like libraries. We follow a convention to name the -plugin like `flytekitplugins-*`, where `*` indicates the package to be integrated into Flytekit. For example -`flytekitplugins-papermill` enables users to author Flytekit tasks using [Papermill](https://papermill.readthedocs.io/en/latest/). - -You can find the plugins maintained by the core Flyte team [here](https://github.com/flyteorg/flytekit/tree/master/plugins). -::: - -## Native Backend Plugins - -Native Backend Plugins are the plugins that can be executed without any external service dependencies because the compute is -orchestrated by Flyte itself, within its provisioned Kubernetes clusters. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`K8s Pods ` - - Execute K8s pods for arbitrary workloads. -* - {doc}`K8s Cluster Dask Jobs ` - - Run Dask jobs on a K8s Cluster. -* - {doc}`K8s Cluster Spark Jobs ` - - Run Spark jobs on a K8s Cluster. -* - {doc}`Kubeflow PyTorch ` - - Run distributed PyTorch training jobs using `Kubeflow`. -* - {doc}`Kubeflow TensorFlow ` - - Run distributed TensorFlow training jobs using `Kubeflow`. -* - {doc}`MPI Operator ` - - Run distributed deep learning training jobs using Horovod and MPI. -* - {doc}`Ray Task ` - - Run Ray jobs on a K8s Cluster. -``` - -(flyte_agents)= - -## Flyte agents - -[Flyte agents](https://docs.flyte.org/en/latest/flyte_agents/index.html) are long-running, stateless services that receive execution requests via gRPC and initiate jobs with appropriate external or internal services. Each agent service is a Kubernetes deployment that receives gRPC requests from FlytePropeller when users trigger a particular type of task. (For example, the BigQuery agent handles BigQuery tasks.) The agent service then initiates a job with the appropriate service. If you don't see the agent you need below, see "[Developing agents](https://docs.flyte.org/en/latest/flyte_agents/developing_agents.html)" to learn how to develop a new agent. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Airflow agent ` - - Run Airflow jobs in your workflows with the Airflow agent. -* - {doc}`BigQuery agent ` - - Run BigQuery jobs in your workflows with the BigQuery agent. -* - {doc}`ChatGPT agent ` - - Run ChatGPT jobs in your workflows with the ChatGPT agent. -* - {doc}`Databricks ` - - Run Databricks jobs in your workflows with the Databricks agent. -* - {doc}`Memory Machine Cloud ` - - Execute tasks using the MemVerge Memory Machine Cloud agent. -* - {doc}`OpenAI Batch ` - - Submit requests for asynchronous batch processing on OpenAI. -* - {doc}`SageMaker Inference ` - - Deploy models and create, as well as trigger inference endpoints on SageMaker. -* - {doc}`Sensor ` - - Run sensor jobs in your workflows with the sensor agent. -* - {doc}`Snowflake ` - - Run Snowflake jobs in your workflows with the Snowflake agent. -``` - -(external_service_backend_plugins)= - -## External Service Backend Plugins - -As the term suggests, external service backend plugins rely on external services like -[Hive](https://docs.qubole.com/en/latest/user-guide/engines/hive/index.html) for handling the workload defined in the Flyte task that uses the respective plugin. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`AWS Athena plugin ` - - Execute queries using AWS Athena -* - {doc}`AWS Batch plugin ` - - Running tasks and workflows on AWS batch service -* - {doc}`Flyte Interactive ` - - Execute tasks using Flyte Interactive to debug. -* - {doc}`Hive plugin ` - - Run Hive jobs in your workflows. -``` - -(enable-backend-plugins)= - -::::{dropdown} {fa}`info-circle` Enabling Backend Plugins -:animate: fade-in-slide-down - -To enable a backend plugin you have to add the `ID` of the plugin to the enabled plugins list. The `enabled-plugins` is available under the `tasks > task-plugins` section of FlytePropeller's configuration. -The plugin configuration structure is defined [here](https://pkg.go.dev/github.com/flyteorg/flytepropeller@v0.6.1/pkg/controller/nodes/task/config#TaskPluginConfig). An example of the config follows, - -```yaml -tasks: - task-plugins: - enabled-plugins: - - container - - sidecar - - k8s-array - default-for-task-types: - container: container - sidecar: sidecar - container_array: k8s-array -``` - -**Finding the `ID` of the Backend Plugin** - -This is a little tricky since you have to look at the source code of the plugin to figure out the `ID`. In the case of Spark, for example, the value of `ID` is used [here](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L424) here, defined as [spark](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L41). - -:::: - -## SDKs for Writing Tasks and Workflows - -The {ref}`community ` would love to help you with your own ideas of building a new SDK. Currently the available SDKs are: - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - [flytekit](https://flytekit.readthedocs.io) - - The Python SDK for Flyte. -* - [flytekit-java](https://github.com/spotify/flytekit-java) - - The Java/Scala SDK for Flyte. -``` - -## Flyte Operators - -Flyte can be integrated with other orchestrators to help you leverage Flyte's -constructs natively within other orchestration tools. - -```{list-table} -:header-rows: 0 -:widths: 20 30 - -* - {doc}`Airflow ` - - Trigger Flyte executions from Airflow. -``` diff --git a/docs/integrations/index.md b/docs/integrations/index.md new file mode 100644 index 000000000..c89d0e29d --- /dev/null +++ b/docs/integrations/index.md @@ -0,0 +1,287 @@ +(integrations)= + +# Integrations + +Flyte is designed to be highly extensible and can be customized in multiple ways. + +```{note} +Want to contribute an integration example? Check out the {ref}`Tutorials and integration examples contribution guide `. +``` + +## Flytekit plugins + +Flytekit plugins can be implemented purely in Python, unit tested locally, and allow extending +Flytekit functionality. For comparison, these plugins can be thought of like +[Airflow operators](https://airflow.apache.org/docs/apache-airflow/stable/howto/operator/index.html). + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - {doc}`Comet ` + - `comet-ml`: Comet’s machine learning platform. +* - {doc}`DBT ` + - Run and test your `dbt` pipelines in Flyte. +* - {doc}`Dolt ` + - Version your SQL database with `dolt`. +* - {doc}`DuckDB ` + - Run analytical queries using DuckDB. +* - {doc}`Great Expectations ` + - Validate data with `great_expectations`. +* - {doc}`MLFlow ` + - `mlflow`: the open standard for model tracking. +* - {doc}`Modin ` + - Scale pandas workflows with `modin`. +* - {doc}`Neptune ` + - `neptune`: Neptune is the MLOps stack component for experiment tracking. +* - {doc}`NIM ` + - Serve optimized model containers with NIM. +* - {doc}`Ollama ` + - Serve fine-tuned LLMs with Ollama in a Flyte workflow. +* - {doc}`ONNX ` + - Convert ML models to ONNX models seamlessly. +* - {doc}`Pandera ` + - Validate pandas dataframes with `pandera`. +* - {doc}`Papermill ` + - Execute Jupyter Notebooks with `papermill`. +* - {doc}`SQL ` + - Execute SQL queries as tasks. +* - {doc}`Weights and Biases ` + - `wandb`: Machine learning platform to build better models faster. +* - {doc}`WhyLogs ` + - `whylogs`: the open standard for data logging. +``` + +:::{dropdown} {fa}`info-circle` Using Flytekit plugins +:animate: fade-in-slide-down + +Data is automatically marshalled and unmarshalled in and out of the plugin. Users should mostly implement the {py:class}`~flytekit.core.base_task.PythonTask` API defined in Flytekit. + +Flytekit plugins are lazily loaded and can be released independently like libraries. The naming convention is `flytekitplugins-*`, where `*` indicates the package to be integrated into Flytekit. For example, `flytekitplugins-papermill` enables users to author Flytekit tasks using [Papermill](https://papermill.readthedocs.io/en/latest/). + +You can find the plugins maintained by the core Flyte team [here](https://github.com/flyteorg/flytekit/tree/master/plugins). +::: + +## Native backend plugins + +Native backend plugins can be executed without any external service dependencies because the compute is orchestrated by Flyte itself, within its provisioned Kubernetes clusters. + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - {doc}`Kubeflow PyTorch ` + - Run distributed PyTorch training jobs using `Kubeflow`. +* - {doc}`Kubeflow TensorFlow ` + - Run distributed TensorFlow training jobs using `Kubeflow`. +* - {doc}`Kubernetes pods ` + - Execute Kubernetes pods for arbitrary workloads. +* - {doc}`Kubernetes cluster Dask jobs ` + - Run Dask jobs on a Kubernetes Cluster. +* - {doc}`Kubernetes cluster Spark jobs ` + - Run Spark jobs on a Kubernetes Cluster. +* - {doc}`MPI Operator ` + - Run distributed deep learning training jobs using Horovod and MPI. +* - {doc}`Ray ` + - Run Ray jobs on a K8s Cluster. +``` + +(flyte_agents)= + +## Flyte agents + +[Flyte agents](https://docs.flyte.org/en/latest/flyte_agents/index.html) are long-running, stateless services that receive execution requests via gRPC and initiate jobs with appropriate external or internal services. Each agent service is a Kubernetes deployment that receives gRPC requests from FlytePropeller when users trigger a particular type of task. (For example, the BigQuery agent handles BigQuery tasks.) The agent service then initiates a job with the appropriate service. If you don't see the agent you need below, see "[Developing agents](https://docs.flyte.org/en/latest/flyte_agents/developing_agents.html)" to learn how to develop a new agent. + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - {doc}`AWS SageMaker Inference agent ` + - Deploy models and create, as well as trigger inference endpoints on AWS SageMaker. +* - {doc}`Airflow agent ` + - Run Airflow jobs in your workflows with the Airflow agent. +* - {doc}`BigQuery agent ` + - Run BigQuery jobs in your workflows with the BigQuery agent. +* - {doc}`ChatGPT agent ` + - Run ChatGPT jobs in your workflows with the ChatGPT agent. +* - {doc}`Databricks agent ` + - Run Databricks jobs in your workflows with the Databricks agent. +* - {doc}`Memory Machine Cloud agent ` + - Execute tasks using the MemVerge Memory Machine Cloud agent. +* - {doc}`OpenAI Batch ` + - Submit requests for asynchronous batch processing on OpenAI. +* - {doc}`PERIAN Job Platform agent ` + - Execute tasks on PERIAN Job Platform. +* - {doc}`Sensor agent ` + - Run sensor jobs in your workflows with the sensor agent. +* - {doc}`Snowflake agent ` + - Run Snowflake jobs in your workflows with the Snowflake agent. +``` + +(external_service_backend_plugins)= + +## External service backend plugins + +As the term suggests, these plugins rely on external services to handle the workload defined in the Flyte task that uses the plugin. + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - {doc}`AWS Athena ` + - Execute queries using AWS Athena +* - {doc}`AWS Batch ` + - Running tasks and workflows on AWS batch service +* - {doc}`Flyte Interactive ` + - Execute tasks using Flyte Interactive to debug. +* - {doc}`Hive ` + - Run Hive jobs in your workflows. +``` + +(enable-backend-plugins)= + +::::{dropdown} {fa}`info-circle` Enabling backend plugins +:animate: fade-in-slide-down + +To enable a backend plugin, you must add the `ID` of the plugin to the enabled plugins list. The `enabled-plugins` is available under the `tasks > task-plugins` section of FlytePropeller's configuration. +The plugin configuration structure is defined [here](https://pkg.go.dev/github.com/flyteorg/flytepropeller@v0.6.1/pkg/controller/nodes/task/config#TaskPluginConfig). An example of the config follows: + +```yaml +tasks: + task-plugins: + enabled-plugins: + - container + - sidecar + - k8s-array + default-for-task-types: + container: container + sidecar: sidecar + container_array: k8s-array +``` + +**Finding the `ID` of the backend plugin** + +To find the `ID` of the backend plugin, look at the source code of the plugin. For examples, in the case of Spark, the value of `ID` is used [here](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L424), defined as [spark](https://github.com/flyteorg/flyteplugins/blob/v0.5.25/go/tasks/plugins/k8s/spark/spark.go#L41). + +:::: + +## SDKs for writing tasks and workflows + +The {ref}`community ` would love to help you build new SDKs. Currently, the available SDKs are: + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - [flytekit](https://github.com/flyteorg/flytekit) + - The Python SDK for Flyte. +* - [flytekit-java](https://github.com/flyteorg/flytekit-java) + - The Java/Scala SDK for Flyte. +``` + +## Flyte operators + +Flyte can be integrated with other orchestrators to help you leverage Flyte's +constructs natively within other orchestration tools. + +```{list-table} +:header-rows: 0 +:widths: 20 30 + +* - {doc}`Airflow ` + - Trigger Flyte executions from Airflow. +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Flytekit plugins + +Comet +DBT +Dolt +DuckDB +Great Expectations +MLFlow +Modin +Neptune +NIM +Ollama +ONNX +Pandera +Papermill +SQL +Weights & Biases +WhyLogs +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Native backend plugins + +Kubeflow PyTorch +Kubeflow TensorFlow +Kubernetes cluster Dask jobs +Kubernetes cluster Spark jobs +MPI Operator +Ray +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Flyte agents + +Airflow agent +AWS Sagemaker inference agent +BigQuery agent +ChatGPT agent +Databricks agent +Memory Machine Cloud agent +OpenAI batch agent +PERIAN Job Platform agent +Sensor agent +Snowflake agent +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: External service backend plugins + +AWS Athena +AWS Batch +Flyte Interactive +Hive + +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: SDKs for writing tasks and workflows + +flytekit +flytekit-java + +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Flyte operators + +Airflow +``` + +```{toctree} +:maxdepth: -1 +:hidden: +:caption: Deprecated integrations + +BigQuery plugin +Databricks plugin +Kubernetes pods +Snowflake plugin +``` diff --git a/docs/bioinformatics_examples.md b/docs/tutorials/bioinformatics/index.md similarity index 81% rename from docs/bioinformatics_examples.md rename to docs/tutorials/bioinformatics/index.md index 0e3bd5508..9207682c4 100644 --- a/docs/bioinformatics_examples.md +++ b/docs/tutorials/bioinformatics/index.md @@ -10,7 +10,7 @@ Flyte very much supports running your bioinformatics applications. Dive deeper i :header-rows: 0 :widths: 20 30 -* - {doc}`Nucleotide Sequence Querying with BLASTX ` +* - {doc}`Nucleotide Sequence Querying with BLASTX ` - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database ``` @@ -19,5 +19,5 @@ Flyte very much supports running your bioinformatics applications. Dive deeper i :caption: Contents :hidden: -auto_examples/blast/index +/auto_examples/blast/index ``` diff --git a/docs/feature_engineering.md b/docs/tutorials/feature_engineering/index.md similarity index 62% rename from docs/feature_engineering.md rename to docs/tutorials/feature_engineering/index.md index b34a73891..99ac9f39b 100644 --- a/docs/feature_engineering.md +++ b/docs/tutorials/feature_engineering/index.md @@ -1,4 +1,4 @@ -# Feature Engineering +# Feature engineering **Feature Engineering** is an essential part of Machine Learning. Itss the process of transforming raw data into features that better represent the underlying problem @@ -10,9 +10,9 @@ Explore how features can be engineered with the power of Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`EDA and Feature Engineering With Papermill ` +* - {doc}`EDA and Feature Engineering With Papermill ` - How to use Jupyter notebook within Flyte -* - {doc}`Data Cleaning and Feature Serving With Feast ` +* - {doc}`Data Cleaning and Feature Serving With Feast ` - How to use Feast to serve data in Flyte ``` @@ -22,6 +22,6 @@ Explore how features can be engineered with the power of Flyte. :caption: Contents :hidden: -auto_examples/exploratory_data_analysis/index -auto_examples/feast_integration/index +/auto_examples/exploratory_data_analysis/index +/auto_examples/feast_integration/index ``` diff --git a/docs/flyte_lab.md b/docs/tutorials/flytelab/index.md similarity index 100% rename from docs/flyte_lab.md rename to docs/tutorials/flytelab/index.md diff --git a/docs/weather_forecasting.md b/docs/tutorials/flytelab/weather_forecasting.md similarity index 100% rename from docs/weather_forecasting.md rename to docs/tutorials/flytelab/weather_forecasting.md diff --git a/docs/tutorials.md b/docs/tutorials/index.md similarity index 62% rename from docs/tutorials.md rename to docs/tutorials/index.md index 559a71554..dc7b3b369 100644 --- a/docs/tutorials.md +++ b/docs/tutorials/index.md @@ -1,8 +1,3 @@ ---- -next-page: ml_training -next-page-title: Model Training ---- - (tutorials)= # Tutorials @@ -17,7 +12,7 @@ contributing samples easy. If this is your first time running these examples, fo {ref}`setup guide ` to get started. ```{note} -Want to contribute an example? Check out the {doc}`Example Contribution Guide `. +Want to contribute a tutorial? Check out the {ref}`Tutorials and integration examples contribution guide `. ``` ## 🤖 Model Training @@ -28,15 +23,15 @@ Train machine learning models from using your framework of choice. :header-rows: 0 :widths: 20 30 -* - {doc}`Diabetes Classification ` +* - {doc}`Diabetes Classification ` - Train an XGBoost model on the Pima Indians Diabetes Dataset. -* - {doc}`House Price Regression ` +* - {doc}`House Price Regression ` - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. -* - {doc}`MNIST Classification ` +* - {doc}`MNIST Classification ` - Train a neural network on MNIST with PyTorch and W&B -* - {doc}`NLP Processing with Gensim ` +* - {doc}`NLP Processing with Gensim ` - Word embedding and topic modelling on lee background corpus with Gensim -* - {doc}`Sales Forecasting ` +* - {doc}`Sales Forecasting ` - Use the Rossmann Store data to forecast sales with distributed training using Horovod on Spark. ``` @@ -48,9 +43,9 @@ Engineer the data features to improve your model accuracy. :header-rows: 0 :widths: 20 30 -* - {doc}`EDA and Feature Engineering With Papermill ` +* - {doc}`EDA and Feature Engineering With Papermill ` - How to use Jupyter notebook within Flyte -* - {doc}`Data Cleaning and Feature Serving With Feast ` +* - {doc}`Data Cleaning and Feature Serving With Feast ` - How to use Feast to serve data in Flyte ``` @@ -62,7 +57,7 @@ Perform computational biology with Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`Nucleotide Sequence Querying with BLASTX ` +* - {doc}`Nucleotide Sequence Querying with BLASTX ` - Use BLASTX to Query a Nucleotide Sequence Against a Local Protein Database ``` @@ -74,6 +69,16 @@ The open-source repository of machine learning projects using Flyte. :header-rows: 0 :widths: 20 30 -* - {doc}`Weather Forecasting ` +* - {doc}`Weather Forecasting ` - Build an online weather forecasting application. ``` + +```{toctree} +:maxdepth: -1 +:hidden: + +Model Training +Feature engineering +Bioinformatics +Flytelab +``` diff --git a/docs/ml_training.md b/docs/tutorials/model_training/index.md similarity index 50% rename from docs/ml_training.md rename to docs/tutorials/model_training/index.md index cd909b2ec..f46ad18c3 100644 --- a/docs/ml_training.md +++ b/docs/tutorials/model_training/index.md @@ -1,4 +1,4 @@ -# Model Training +# Model training Understand how machine learning models can be trained from within Flyte, with an added advantage of orchestration benefits. @@ -6,15 +6,15 @@ Understand how machine learning models can be trained from within Flyte, with an :header-rows: 0 :widths: 20 30 -* - {doc}`Diabetes Classification ` +* - {doc}`Diabetes Classification ` - Train an XGBoost model on the Pima Indians Diabetes Dataset. -* - {doc}`House Price Regression ` +* - {doc}`House Price Regression ` - Use dynamic workflows to train a multiregion house price prediction model using XGBoost. -* - {doc}`MNIST Classification ` +* - {doc}`MNIST Classification ` - Train a neural network on MNIST with PyTorch and W&B -* - {doc}`NLP Processing with Gensim ` +* - {doc}`NLP Processing with Gensim ` - Word embedding and topic modelling on lee background corpus with Gensim -* - {doc}`Forecast Sales Using Rossmann Store Sales ` +* - {doc}`Forecast Sales Using Rossmann Store Sales ` - Forecast sales data with data-parallel distributed training using Horovod on Spark. ``` @@ -23,9 +23,9 @@ Understand how machine learning models can be trained from within Flyte, with an :caption: Contents :hidden: -auto_examples/pima_diabetes/index -auto_examples/house_price_prediction/index -auto_examples/mnist_classifier/index -auto_examples/nlp_processing/index -auto_examples/forecasting_sales/index +/auto_examples/pima_diabetes/index +/auto_examples/house_price_prediction/index +/auto_examples/mnist_classifier/index +/auto_examples/nlp_processing/index +/auto_examples/forecasting_sales/index ``` diff --git a/examples/advanced_composition/advanced_composition/conditional.py b/examples/advanced_composition/advanced_composition/conditional.py index 7c314162b..f29c0770a 100644 --- a/examples/advanced_composition/advanced_composition/conditional.py +++ b/examples/advanced_composition/advanced_composition/conditional.py @@ -79,7 +79,10 @@ def shape_properties_accept_conditional_output(radius: float) -> float: if __name__ == "__main__": - print(f"Circumference of circle x Area of circle (radius={radius_small}): {shape_properties(radius=5.0)}") + radius_small = 0.5 + print( + f"Circumference of circle (radius={radius_small}) x Area of circle (radius={calculate_circle_circumference(radius=radius_small)}): {shape_properties_accept_conditional_output(radius=radius_small)}" + ) # Using the output of a previous task in a conditional @@ -213,8 +216,10 @@ def noop_in_conditional(radius: float, seed: int = 5) -> float: if __name__ == "__main__": default_seed_output = consume_task_output(radius=0.4) print( - f"Executing consume_task_output(0.4) with default seed=5. Expected output: calculate_circle_circumference => {default_seed_output}" + f"Executing consume_task_output(0.4) with default seed=5. Expected output: calculate_circle_area => {default_seed_output}" ) custom_seed_output = consume_task_output(radius=0.4, seed=7) - print(f"Executing consume_task_output(0.4, seed=7). Expected output: calculate_circle_area => {custom_seed_output}") + print( + f"Executing consume_task_output(0.4, seed=7). Expected output: calculate_circle_circumference => {custom_seed_output}" + ) diff --git a/examples/airflow_plugin/airflow_plugin/airflow.py b/examples/airflow_plugin/airflow_plugin/airflow.py index 35a296261..d47c75277 100644 --- a/examples/airflow_plugin/airflow_plugin/airflow.py +++ b/examples/airflow_plugin/airflow_plugin/airflow.py @@ -66,7 +66,7 @@ # Click `Save` in the end. # # :::{note} -# Use external IP as the Flyte `Host`. You can {std:ref}`deploy ` Flyte on an on-prem machine or on cloud. +# Use external IP as the Flyte `Host`. You can {ref}`deploy ` Flyte on an on-prem machine or on cloud. # ::: # # ## Register Flyte Code diff --git a/examples/bigquery_agent/requirements.in b/examples/bigquery_agent/requirements.in index a987746f1..e496aea34 100644 --- a/examples/bigquery_agent/requirements.in +++ b/examples/bigquery_agent/requirements.in @@ -1,5 +1,6 @@ flytekit wheel matplotlib +pandas flytekitplugins-deck-standard flytekitplugins-bigquery diff --git a/examples/bigquery_plugin/README.md b/examples/bigquery_plugin/README.md index 5027330e5..46e379751 100644 --- a/examples/bigquery_plugin/README.md +++ b/examples/bigquery_plugin/README.md @@ -1,5 +1,14 @@ -# BigQuery plugin example +# BigQuery plugin -**Note:** This example code uses the legacy implementation of the BigQuery integration. We recommend using the [BigQuery agent](https://docs.flyte.org/en/latest/flytesnacks/examples/bigquery_agent/index.html) instead. +```{warning} +This example code uses the legacy implementation of the BigQuery integration. We recommend using the [BigQuery agent](https://docs.flyte.org/en/latest/flytesnacks/examples/bigquery_agent/index.html) instead. +``` This directory contains example code for the deprecated BigQuery plugin. For documentation on installing and using the plugin, see the [BigQuery plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/bigquery_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +bigquery_plugin_example +``` diff --git a/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py b/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py index 000882653..3aa740edc 100644 --- a/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py +++ b/examples/bigquery_plugin/bigquery_plugin/bigquery_plugin_example.py @@ -1,3 +1,9 @@ +# %% [markdown] +# (bigquery_plugin_example)= +# # BigQuery plugin example +# +# %% + try: from typing import Annotated except ImportError: diff --git a/examples/bigquery_plugin/requirements.in b/examples/bigquery_plugin/requirements.in index a987746f1..e496aea34 100644 --- a/examples/bigquery_plugin/requirements.in +++ b/examples/bigquery_plugin/requirements.in @@ -1,5 +1,6 @@ flytekit wheel matplotlib +pandas flytekitplugins-deck-standard flytekitplugins-bigquery diff --git a/examples/blast/requirements.in b/examples/blast/requirements.in index bec48ef8c..30f834337 100644 --- a/examples/blast/requirements.in +++ b/examples/blast/requirements.in @@ -1,4 +1,5 @@ flytekit>=0.32.3 wheel matplotlib +pandas flytekitplugins-deck-standard diff --git a/examples/comet_ml_plugin/Dockerfile b/examples/comet_ml_plugin/Dockerfile new file mode 100644 index 000000000..4e04f77bd --- /dev/null +++ b/examples/comet_ml_plugin/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.11-slim-bookworm +LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +WORKDIR /root + +ENV VENV /opt/venv +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/comet_ml_plugin/README.md b/examples/comet_ml_plugin/README.md new file mode 100644 index 000000000..40a962a52 --- /dev/null +++ b/examples/comet_ml_plugin/README.md @@ -0,0 +1,36 @@ +(comet_ml)= + +# Comet ML + +```{eval-rst} +.. tags:: Integration, Data, Metrics, Intermediate +``` + +Comet’s machine learning platform integrates with your existing infrastructure and tools so you can manage, visualize, and optimize models from training runs to production monitoring. This plugin integrates Flyte with Comet by configuring links between the two platforms. + +To install the plugin, run: + +```bash +pip install flytekitplugins-comet-ml +``` + +Comet requires an API key to authenticate with their platform. In the above example, a secret is created using +[Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). + +To enable linking from the Flyte side panel to Comet.ml, add the following to Flyte's configuration: + +```yaml +plugins: + logs: + dynamic-log-links: + - comet-ml-execution-id: + displayName: Comet + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .executionName }}{{ .nodeId }}{{ .taskRetryAttempt }}{{ .taskConfig.link_suffix }}" + - comet-ml-custom-id: + displayName: Comet + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .taskConfig.experiment_key }}" +``` + +```{auto-examples-toc} +comet_ml_example +``` diff --git a/examples/comet_ml_plugin/comet_ml_plugin/__init__.py b/examples/comet_ml_plugin/comet_ml_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py b/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py new file mode 100644 index 000000000..af5c92e71 --- /dev/null +++ b/examples/comet_ml_plugin/comet_ml_plugin/comet_ml_example.py @@ -0,0 +1,157 @@ +# %% [markdown] +# (comet_ml_example)= +# +# # Comet Example +# Comet’s machine learning platform integrates with your existing infrastructure and +# tools so you can manage, visualize, and optimize models from training runs to +# production monitoring. This plugin integrates Flyte with Comet by configuring +# links between the two platforms. +import os +import os.path + +from flytekit import ( + ImageSpec, + Secret, + current_context, + task, + workflow, +) +from flytekit.types.directory import FlyteDirectory +from flytekitplugins.comet_ml import comet_ml_login + +# %% [markdown] +# First, we specify the project and workspace that we will use with Comet's platform +# Please update `PROJECT_NAME` and `WORKSPACE` to the values associated with your account. +# %% +PROJECT_NAME = "flytekit-comet-ml-v1" +WORKSPACE = "thomas-unionai" + +# %% [markdown] +# W&B requires an API key to authenticate with Comet. In the above example, +# the secret is created using +# [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). +# %% +secret = Secret(key="comet-ml-key", group="comet-ml-group") + +# %% [markdown] +# Next, we use `ImageSpec` to construct a container that contains the dependencies for this +# task: +# %% + +REGISTRY = os.getenv("REGISTRY", "localhost:30000") +image = ImageSpec( + name="comet-ml", + packages=[ + "torch==2.3.1", + "comet-ml==3.43.2", + "lightning==2.3.0", + "flytekitplugins-comet-ml", + "torchvision", + ], + builder="default", + registry=REGISTRY, +) + + +# %% [markdown] +# Here, we use a Flyte task to download the dataset and cache it: +# %% +@task(cache=True, cache_version="2", container_image=image) +def get_dataset() -> FlyteDirectory: + from torchvision.datasets import MNIST + + ctx = current_context() + dataset_dir = os.path.join(ctx.working_directory, "datasetset") + os.makedirs(dataset_dir, exist_ok=True) + + # Download training and evaluation dataset + MNIST(dataset_dir, train=True, download=True) + MNIST(dataset_dir, train=False, download=True) + + return dataset_dir + + +# %% +# The `comet_ml_login` decorator calls `comet_ml.init` and configures it to use Flyte's +# execution id as the Comet's experiment key. The body of the task is PyTorch Lightning +# training code, where we pass `CometLogger` into the `Trainer`'s `logger`. +@task( + secret_requests=[secret], + container_image=image, +) +@comet_ml_login( + project_name=PROJECT_NAME, + workspace=WORKSPACE, + secret=secret, +) +def train_lightning(dataset: FlyteDirectory, hidden_layer_size: int): + import pytorch_lightning as pl + import torch + import torch.nn.functional as F + from pytorch_lightning import Trainer + from pytorch_lightning.loggers import CometLogger + from torch.utils.data import DataLoader + from torchvision import transforms + from torchvision.datasets import MNIST + + class Model(pl.LightningModule): + def __init__(self, layer_size=784, hidden_layer_size=256): + super().__init__() + self.save_hyperparameters() + self.layers = torch.nn.Sequential( + torch.nn.Linear(layer_size, hidden_layer_size), + torch.nn.Linear(hidden_layer_size, 10), + ) + + def forward(self, x): + return torch.relu(self.layers(x.view(x.size(0), -1))) + + def training_step(self, batch, batch_nb): + x, y = batch + loss = F.cross_entropy(self(x), y) + self.logger.log_metrics({"train_loss": loss}, step=batch_nb) + return loss + + def validation_step(self, batch, batch_nb): + x, y = batch + y_hat = self.forward(x) + loss = F.cross_entropy(y_hat, y) + self.logger.log_metrics({"val_loss": loss}, step=batch_nb) + return loss + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=0.02) + + dataset.download() + train_ds = MNIST(dataset, train=True, download=False, transform=transforms.ToTensor()) + eval_ds = MNIST(dataset, train=False, download=False, transform=transforms.ToTensor()) + train_loader = DataLoader(train_ds, batch_size=32) + eval_loader = DataLoader(eval_ds, batch_size=32) + + comet_logger = CometLogger() + comet_logger.log_hyperparams({"batch_size": 32}) + + model = Model(hidden_layer_size=hidden_layer_size) + trainer = Trainer(max_epochs=1, fast_dev_run=True, logger=comet_logger) + trainer.fit(model, train_loader, eval_loader) + + +@workflow +def main(hidden_layer_size: int = 32): + dataset = get_dataset() + train_lightning(dataset=dataset, hidden_layer_size=hidden_layer_size) + + +# %% [markdown] +# To enable dynamic log links, add plugin to Flyte's configuration file: +# ```yaml +# plugins: +# logs: +# dynamic-log-links: +# - comet-ml-execution-id: +# displayName: Comet +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .executionName }}{{ .nodeId }}{{ .taskRetryAttempt }}{{ .taskConfig.link_suffix }}" +# - comet-ml-custom-id: +# displayName: Comet +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.workspace }}/{{ .taskConfig.project_name }}/{{ .taskConfig.experiment_key }}" +# ``` diff --git a/examples/comet_ml_plugin/requirements.in b/examples/comet_ml_plugin/requirements.in new file mode 100644 index 000000000..bfadcb009 --- /dev/null +++ b/examples/comet_ml_plugin/requirements.in @@ -0,0 +1 @@ +flytekitplugins-comet-ml diff --git a/examples/customizing_dependencies/customizing_dependencies/raw_container.py b/examples/customizing_dependencies/customizing_dependencies/raw_container.py index e8dc9a5ed..9d986da56 100644 --- a/examples/customizing_dependencies/customizing_dependencies/raw_container.py +++ b/examples/customizing_dependencies/customizing_dependencies/raw_container.py @@ -1,6 +1,7 @@ import logging from flytekit import ContainerTask, kwtypes, task, workflow +from flytekit.core.base_task import TaskMetadata logger = logging.getLogger(__file__) @@ -25,6 +26,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_python = ContainerTask( @@ -41,6 +43,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_r = ContainerTask( @@ -58,6 +61,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_haskell = ContainerTask( @@ -73,6 +77,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) calculate_ellipse_area_julia = ContainerTask( @@ -89,6 +94,7 @@ "{{.inputs.b}}", "/var/outputs", ], + metadata=TaskMetadata(cache=True, cache_version="1.0"), ) diff --git a/examples/data_types_and_io/data_types_and_io/dataclass.py b/examples/data_types_and_io/data_types_and_io/dataclass.py index 3fac38f0d..44c73e4f9 100644 --- a/examples/data_types_and_io/data_types_and_io/dataclass.py +++ b/examples/data_types_and_io/data_types_and_io/dataclass.py @@ -7,7 +7,6 @@ from flytekit.types.directory import FlyteDirectory from flytekit.types.file import FlyteFile from flytekit.types.structured import StructuredDataset -from mashumaro.mixins.json import DataClassJSONMixin # NOTE: If you're using Flytekit version below v1.10, you'll need to decorate with `@dataclass_json` using # `from dataclass_json import dataclass_json` instead of inheriting from Mashumaro's `DataClassJSONMixin`. @@ -23,7 +22,7 @@ # Python types # Define a `dataclass` with `int`, `str` and `dict` as the data types @dataclass -class Datum(DataClassJSONMixin): +class Datum: x: int y: str z: dict[int, str] @@ -50,7 +49,7 @@ def add(x: Datum, y: Datum) -> Datum: # Flyte types @dataclass -class FlyteTypes(DataClassJSONMixin): +class FlyteTypes: dataframe: StructuredDataset file: FlyteFile directory: FlyteDirectory diff --git a/examples/data_types_and_io/data_types_and_io/file.py b/examples/data_types_and_io/data_types_and_io/file.py index ede0fd7ae..1c89c1e18 100644 --- a/examples/data_types_and_io/data_types_and_io/file.py +++ b/examples/data_types_and_io/data_types_and_io/file.py @@ -37,17 +37,17 @@ def normalize_columns( normalized_data[colname] = [(x - mean) / std for x in values] # write to local path - out_path = Path(flytekit.current_context().working_directory) / f"normalized-{Path(csv_url.path).stem}.csv" - with out_path.open(mode="w") as output_file: + out_path = str(Path(flytekit.current_context().working_directory) / f"normalized-{Path(csv_url.path).stem}.csv") + with open(out_path, mode="w") as output_file: writer = csv.DictWriter(output_file, fieldnames=columns_to_normalize) writer.writeheader() for row in zip(*normalized_data.values()): writer.writerow({k: row[i] for i, k in enumerate(columns_to_normalize)}) if output_location: - return FlyteFile(path=out_path, remote_path=output_location) + return FlyteFile(path=str(out_path), remote_path=output_location) else: - return FlyteFile(path=out_path) + return FlyteFile(path=str(out_path)) # Define a workflow. The `normalize_csv_files` workflow has an `output_location` argument which is passed @@ -72,12 +72,12 @@ def normalize_csv_file( if __name__ == "__main__": default_files = [ ( - "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/biostats.csv", ["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], ["Age"], ), ( - "https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/faithful.csv", ["Index", "Eruption length (mins)", "Eruption wait (mins)"], ["Eruption length (mins)"], ), diff --git a/examples/data_types_and_io/data_types_and_io/file_streaming.py b/examples/data_types_and_io/data_types_and_io/file_streaming.py new file mode 100644 index 000000000..73e1add2a --- /dev/null +++ b/examples/data_types_and_io/data_types_and_io/file_streaming.py @@ -0,0 +1,45 @@ +import os + +import pandas as pd +from flytekit import task, workflow +from flytekit.types.directory import FlyteDirectory +from flytekit.types.file import FlyteFile + + +@task() +def remove_some_rows(ff: FlyteFile) -> FlyteFile: + """ + Remove the rows that the value of city is 'Seattle'. + This is an example with streaming support. + """ + new_file = FlyteFile.new_remote_file("data_without_seattle.csv") + with ff.open("r") as r: + with new_file.open("w") as w: + df = pd.read_csv(r) + df = df[df["City"] != "Seattle"] + df.to_csv(w, index=False) + return new_file + + +@task +def process_folder(fd: FlyteDirectory) -> FlyteDirectory: + out_fd = FlyteDirectory.new_remote("folder-copy") + for base, x in fd.crawl(): + src = str(os.path.join(base, x)) + out_file = out_fd.new_file(x) + with FlyteFile(src).open("rb") as f: + with out_file.open("wb") as o: + o.write(f.read()) + # The output path will be s3://my-s3-bucket/data/77/--0/folder-copy + return out_fd + + +@workflow() +def wf(): + remove_some_rows(ff=FlyteFile("s3://custom-bucket/data.csv")) + process_folder(fd=FlyteDirectory("s3://my-s3-bucket/folder")) + return + + +if __name__ == "__main__": + print(f"Running wf() {wf()}") diff --git a/examples/data_types_and_io/data_types_and_io/folder.py b/examples/data_types_and_io/data_types_and_io/folder.py index 46b0e5e5a..4b79e3233 100644 --- a/examples/data_types_and_io/data_types_and_io/folder.py +++ b/examples/data_types_and_io/data_types_and_io/folder.py @@ -93,8 +93,8 @@ def download_and_normalize_csv_files( # Run the workflow locally if __name__ == "__main__": csv_urls = [ - "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", - "https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/biostats.csv", + "https://raw.githubusercontent.com/flyteorg/flytesnacks/refs/heads/master/examples/data_types_and_io/test_data/faithful.csv", ] columns_metadata = [ ["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], diff --git a/examples/data_types_and_io/data_types_and_io/tensorflow_type.py b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py new file mode 100644 index 000000000..3ec8aea71 --- /dev/null +++ b/examples/data_types_and_io/data_types_and_io/tensorflow_type.py @@ -0,0 +1,62 @@ +# Import necessary libraries and modules + +from flytekit import ImageSpec, task, workflow +from flytekit.types.directory import TFRecordsDirectory +from flytekit.types.file import TFRecordFile + +custom_image = ImageSpec( + packages=["tensorflow", "tensorflow-datasets", "flytekitplugins-kftensorflow"], + registry="ghcr.io/flyteorg", +) + +import tensorflow as tf + + +# TensorFlow Model +@task +def train_model() -> tf.keras.Model: + model = tf.keras.Sequential( + [tf.keras.layers.Dense(128, activation="relu"), tf.keras.layers.Dense(10, activation="softmax")] + ) + model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) + return model + + +@task +def evaluate_model(model: tf.keras.Model, x: tf.Tensor, y: tf.Tensor) -> float: + loss, accuracy = model.evaluate(x, y) + return accuracy + + +@workflow +def training_workflow(x: tf.Tensor, y: tf.Tensor) -> float: + model = train_model() + return evaluate_model(model=model, x=x, y=y) + + +# TFRecord Files +@task +def process_tfrecord(file: TFRecordFile) -> int: + count = 0 + for record in tf.data.TFRecordDataset(file): + count += 1 + return count + + +@workflow +def tfrecord_workflow(file: TFRecordFile) -> int: + return process_tfrecord(file=file) + + +# TFRecord Directories +@task +def process_tfrecords_dir(dir: TFRecordsDirectory) -> int: + count = 0 + for record in tf.data.TFRecordDataset(dir.path): + count += 1 + return count + + +@workflow +def tfrecords_dir_workflow(dir: TFRecordsDirectory) -> int: + return process_tfrecords_dir(dir=dir) diff --git a/examples/data_types_and_io/requirements.in b/examples/data_types_and_io/requirements.in index 79bd303e5..2bcce8b12 100644 --- a/examples/data_types_and_io/requirements.in +++ b/examples/data_types_and_io/requirements.in @@ -1,4 +1,5 @@ pandas torch tabulate +tensorflow pyarrow diff --git a/examples/data_types_and_io/test_data/biostats.csv b/examples/data_types_and_io/test_data/biostats.csv new file mode 100644 index 000000000..b09efca0c --- /dev/null +++ b/examples/data_types_and_io/test_data/biostats.csv @@ -0,0 +1,19 @@ +"Name", "Sex", "Age", "Height (in)", "Weight (lbs)" +"Alex", "M", 41, 74, 170 +"Bert", "M", 42, 68, 166 +"Carl", "M", 32, 70, 155 +"Dave", "M", 39, 72, 167 +"Elly", "F", 30, 66, 124 +"Fran", "F", 33, 66, 115 +"Gwen", "F", 26, 64, 121 +"Hank", "M", 30, 71, 158 +"Ivan", "M", 53, 72, 175 +"Jake", "M", 32, 69, 143 +"Kate", "F", 47, 69, 139 +"Luke", "M", 34, 72, 163 +"Myra", "F", 23, 62, 98 +"Neil", "M", 36, 75, 160 +"Omar", "M", 38, 70, 145 +"Page", "F", 31, 67, 135 +"Quin", "M", 29, 71, 176 +"Ruth", "F", 28, 65, 131 diff --git a/examples/data_types_and_io/test_data/faithful.csv b/examples/data_types_and_io/test_data/faithful.csv new file mode 100644 index 000000000..a11030a50 --- /dev/null +++ b/examples/data_types_and_io/test_data/faithful.csv @@ -0,0 +1,273 @@ +"Index", "Eruption length (mins)","Eruption wait (mins)" + 1, 3.600, 79 + 2, 1.800, 54 + 3, 3.333, 74 + 4, 2.283, 62 + 5, 4.533, 85 + 6, 2.883, 55 + 7, 4.700, 88 + 8, 3.600, 85 + 9, 1.950, 51 + 10, 4.350, 85 + 11, 1.833, 54 + 12, 3.917, 84 + 13, 4.200, 78 + 14, 1.750, 47 + 15, 4.700, 83 + 16, 2.167, 52 + 17, 1.750, 62 + 18, 4.800, 84 + 19, 1.600, 52 + 20, 4.250, 79 + 21, 1.800, 51 + 22, 1.750, 47 + 23, 3.450, 78 + 24, 3.067, 69 + 25, 4.533, 74 + 26, 3.600, 83 + 27, 1.967, 55 + 28, 4.083, 76 + 29, 3.850, 78 + 30, 4.433, 79 + 31, 4.300, 73 + 32, 4.467, 77 + 33, 3.367, 66 + 34, 4.033, 80 + 35, 3.833, 74 + 36, 2.017, 52 + 37, 1.867, 48 + 38, 4.833, 80 + 39, 1.833, 59 + 40, 4.783, 90 + 41, 4.350, 80 + 42, 1.883, 58 + 43, 4.567, 84 + 44, 1.750, 58 + 45, 4.533, 73 + 46, 3.317, 83 + 47, 3.833, 64 + 48, 2.100, 53 + 49, 4.633, 82 + 50, 2.000, 59 + 51, 4.800, 75 + 52, 4.716, 90 + 53, 1.833, 54 + 54, 4.833, 80 + 55, 1.733, 54 + 56, 4.883, 83 + 57, 3.717, 71 + 58, 1.667, 64 + 59, 4.567, 77 + 60, 4.317, 81 + 61, 2.233, 59 + 62, 4.500, 84 + 63, 1.750, 48 + 64, 4.800, 82 + 65, 1.817, 60 + 66, 4.400, 92 + 67, 4.167, 78 + 68, 4.700, 78 + 69, 2.067, 65 + 70, 4.700, 73 + 71, 4.033, 82 + 72, 1.967, 56 + 73, 4.500, 79 + 74, 4.000, 71 + 75, 1.983, 62 + 76, 5.067, 76 + 77, 2.017, 60 + 78, 4.567, 78 + 79, 3.883, 76 + 80, 3.600, 83 + 81, 4.133, 75 + 82, 4.333, 82 + 83, 4.100, 70 + 84, 2.633, 65 + 85, 4.067, 73 + 86, 4.933, 88 + 87, 3.950, 76 + 88, 4.517, 80 + 89, 2.167, 48 + 90, 4.000, 86 + 91, 2.200, 60 + 92, 4.333, 90 + 93, 1.867, 50 + 94, 4.817, 78 + 95, 1.833, 63 + 96, 4.300, 72 + 97, 4.667, 84 + 98, 3.750, 75 + 99, 1.867, 51 +100, 4.900, 82 +101, 2.483, 62 +102, 4.367, 88 +103, 2.100, 49 +104, 4.500, 83 +105, 4.050, 81 +106, 1.867, 47 +107, 4.700, 84 +108, 1.783, 52 +109, 4.850, 86 +110, 3.683, 81 +111, 4.733, 75 +112, 2.300, 59 +113, 4.900, 89 +114, 4.417, 79 +115, 1.700, 59 +116, 4.633, 81 +117, 2.317, 50 +118, 4.600, 85 +119, 1.817, 59 +120, 4.417, 87 +121, 2.617, 53 +122, 4.067, 69 +123, 4.250, 77 +124, 1.967, 56 +125, 4.600, 88 +126, 3.767, 81 +127, 1.917, 45 +128, 4.500, 82 +129, 2.267, 55 +130, 4.650, 90 +131, 1.867, 45 +132, 4.167, 83 +133, 2.800, 56 +134, 4.333, 89 +135, 1.833, 46 +136, 4.383, 82 +137, 1.883, 51 +138, 4.933, 86 +139, 2.033, 53 +140, 3.733, 79 +141, 4.233, 81 +142, 2.233, 60 +143, 4.533, 82 +144, 4.817, 77 +145, 4.333, 76 +146, 1.983, 59 +147, 4.633, 80 +148, 2.017, 49 +149, 5.100, 96 +150, 1.800, 53 +151, 5.033, 77 +152, 4.000, 77 +153, 2.400, 65 +154, 4.600, 81 +155, 3.567, 71 +156, 4.000, 70 +157, 4.500, 81 +158, 4.083, 93 +159, 1.800, 53 +160, 3.967, 89 +161, 2.200, 45 +162, 4.150, 86 +163, 2.000, 58 +164, 3.833, 78 +165, 3.500, 66 +166, 4.583, 76 +167, 2.367, 63 +168, 5.000, 88 +169, 1.933, 52 +170, 4.617, 93 +171, 1.917, 49 +172, 2.083, 57 +173, 4.583, 77 +174, 3.333, 68 +175, 4.167, 81 +176, 4.333, 81 +177, 4.500, 73 +178, 2.417, 50 +179, 4.000, 85 +180, 4.167, 74 +181, 1.883, 55 +182, 4.583, 77 +183, 4.250, 83 +184, 3.767, 83 +185, 2.033, 51 +186, 4.433, 78 +187, 4.083, 84 +188, 1.833, 46 +189, 4.417, 83 +190, 2.183, 55 +191, 4.800, 81 +192, 1.833, 57 +193, 4.800, 76 +194, 4.100, 84 +195, 3.966, 77 +196, 4.233, 81 +197, 3.500, 87 +198, 4.366, 77 +199, 2.250, 51 +200, 4.667, 78 +201, 2.100, 60 +202, 4.350, 82 +203, 4.133, 91 +204, 1.867, 53 +205, 4.600, 78 +206, 1.783, 46 +207, 4.367, 77 +208, 3.850, 84 +209, 1.933, 49 +210, 4.500, 83 +211, 2.383, 71 +212, 4.700, 80 +213, 1.867, 49 +214, 3.833, 75 +215, 3.417, 64 +216, 4.233, 76 +217, 2.400, 53 +218, 4.800, 94 +219, 2.000, 55 +220, 4.150, 76 +221, 1.867, 50 +222, 4.267, 82 +223, 1.750, 54 +224, 4.483, 75 +225, 4.000, 78 +226, 4.117, 79 +227, 4.083, 78 +228, 4.267, 78 +229, 3.917, 70 +230, 4.550, 79 +231, 4.083, 70 +232, 2.417, 54 +233, 4.183, 86 +234, 2.217, 50 +235, 4.450, 90 +236, 1.883, 54 +237, 1.850, 54 +238, 4.283, 77 +239, 3.950, 79 +240, 2.333, 64 +241, 4.150, 75 +242, 2.350, 47 +243, 4.933, 86 +244, 2.900, 63 +245, 4.583, 85 +246, 3.833, 82 +247, 2.083, 57 +248, 4.367, 82 +249, 2.133, 67 +250, 4.350, 74 +251, 2.200, 54 +252, 4.450, 83 +253, 3.567, 73 +254, 4.500, 73 +255, 4.150, 88 +256, 3.817, 80 +257, 3.917, 71 +258, 4.450, 83 +259, 2.000, 56 +260, 4.283, 79 +261, 4.767, 78 +262, 4.533, 84 +263, 1.850, 58 +264, 4.250, 83 +265, 1.983, 43 +266, 2.250, 60 +267, 4.750, 75 +268, 4.117, 81 +269, 2.150, 46 +270, 4.417, 90 +271, 1.817, 46 +272, 4.467, 74 diff --git a/examples/databricks_agent/README.md b/examples/databricks_agent/README.md index 6d6f4e754..0c1148926 100644 --- a/examples/databricks_agent/README.md +++ b/examples/databricks_agent/README.md @@ -1,6 +1,6 @@ (databricks_agent)= -# Databricks agent example +# Databricks agent ```{eval-rst} .. tags:: Spark, Integration, DistributedComputing, Data, Advanced diff --git a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py index 9de652bc3..1fe9d3ff9 100644 --- a/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py +++ b/examples/databricks_agent/databricks_agent/databricks_agent_example_usage.py @@ -11,7 +11,7 @@ import flytekit from flytekit import Resources, task, workflow -from flytekitplugins.spark import Databricks +from flytekitplugins.spark import DatabricksV2 as Databricks # %% [markdown] diff --git a/examples/databricks_plugin/README.md b/examples/databricks_plugin/README.md index ca7b3a2b6..636eb7f94 100644 --- a/examples/databricks_plugin/README.md +++ b/examples/databricks_plugin/README.md @@ -1,5 +1,14 @@ -# Databricks plugin example +# Databricks plugin -**Note:** This example code uses a legacy implementation of the Databricks integration. We recommend using the [Databricks agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +```{warning} +This example code uses a legacy implementation of the Databricks integration. We recommend using the [Databricks agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +``` This directory contains example code for the deprecated Databricks plugin. For documentation on installing and using the plugin, see the [Databricks plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/databricks_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +databricks_plugin_example +``` diff --git a/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py b/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py index 3137ca1b5..72f98c5ad 100644 --- a/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py +++ b/examples/databricks_plugin/databricks_plugin/databricks_plugin_example.py @@ -1,3 +1,9 @@ +# %% [markdown] +# (spark_on_databricks_plugin)= +# # Databricks plugin example +# +# %% + import datetime import random from operator import add diff --git a/examples/development_lifecycle/development_lifecycle/decks.py b/examples/development_lifecycle/development_lifecycle/decks.py index d849bd339..50cf4daf2 100644 --- a/examples/development_lifecycle/development_lifecycle/decks.py +++ b/examples/development_lifecycle/development_lifecycle/decks.py @@ -13,7 +13,17 @@ # https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/imagespec.html#image-spec-example custom_image = ImageSpec( - packages=["plotly", "scikit-learn", "flytekitplugins-deck-standard"], registry="ghcr.io/flyteorg" + packages=[ + "flytekitplugins-deck-standard", + "markdown", + "pandas", + "pillow", + "plotly", + "pyarrow", + "scikit-learn", + "ydata_profiling", + ], + registry="ghcr.io/flyteorg", ) if custom_image.is_container(): @@ -47,7 +57,7 @@ def pca_plot(): from flytekitplugins.deck.renderer import FrameProfilingRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def frame_renderer() -> None: df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) flytekit.Deck("Frame Renderer", FrameProfilingRenderer().to_html(df=df)) @@ -61,7 +71,7 @@ def frame_renderer() -> None: from flytekit.deck import TopFrameRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]: return pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) @@ -70,7 +80,7 @@ def top_frame_renderer() -> Annotated[pd.DataFrame, TopFrameRenderer(1)]: # producing HTML as a Unicode string. -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def markdown_renderer() -> None: flytekit.current_context().default_deck.append( MarkdownRenderer().to_html("You can install flytekit using this command: ```import flytekit```") @@ -87,7 +97,7 @@ def markdown_renderer() -> None: from flytekitplugins.deck.renderer import BoxRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def box_renderer() -> None: iris_df = px.data.iris() flytekit.Deck("Box Plot", BoxRenderer("sepal_length").to_html(iris_df)) @@ -101,7 +111,7 @@ def box_renderer() -> None: from flytekitplugins.deck.renderer import ImageRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def image_renderer(image: FlyteFile) -> None: flytekit.Deck("Image Renderer", ImageRenderer().to_html(image_src=image)) @@ -117,7 +127,7 @@ def image_renderer_wf( from flytekitplugins.deck.renderer import TableRenderer -@task(enable_deck=True) +@task(enable_deck=True, container_image=custom_image) def table_renderer() -> None: flytekit.Deck( "Table Renderer", diff --git a/examples/development_lifecycle/requirements.in b/examples/development_lifecycle/requirements.in index 8e50db9c6..89af2129e 100644 --- a/examples/development_lifecycle/requirements.in +++ b/examples/development_lifecycle/requirements.in @@ -3,4 +3,5 @@ flytekitplugins-deck-standard plotly scikit-learn tabulate +pandas pyarrow diff --git a/examples/house_price_prediction/README.md b/examples/house_price_prediction/README.md index 49f5c55c5..70341e6c2 100644 --- a/examples/house_price_prediction/README.md +++ b/examples/house_price_prediction/README.md @@ -12,7 +12,7 @@ In this example, we will train our data on the XGBoost model to predict house pr ## Where Does Flyte Fit In? - Orchestrates the machine learning pipeline. -- Helps cache the output state between {py:func}`tasks `. +- Helps cache the output state between {py:func}`tasks `. - Easier backtracking to the error source. - Provides a Rich UI to view and manage the pipeline. diff --git a/examples/house_price_prediction/requirements.in b/examples/house_price_prediction/requirements.in index b571e33d3..f09c8717b 100644 --- a/examples/house_price_prediction/requirements.in +++ b/examples/house_price_prediction/requirements.in @@ -7,3 +7,4 @@ joblib scikit-learn tabulate matplotlib +pandas diff --git a/examples/k8s_dask_plugin/README.md b/examples/k8s_dask_plugin/README.md index 713e810c2..c5f6a02ed 100644 --- a/examples/k8s_dask_plugin/README.md +++ b/examples/k8s_dask_plugin/README.md @@ -71,7 +71,7 @@ Flyte Dask utilizes the [Dask Kubernetes operator](https://kubernetes.dask.org/e in conjunction with a custom-built [Flyte Dask plugin](https://pkg.go.dev/github.com/flyteorg/flyteplugins@v1.0.28/go/tasks/plugins/k8s/dask). To leverage this functionality, you need to enable the backend plugin in your deployment. -You can follow the steps mentioned in the {ref}`flyte:deployment-plugin-setup-k8s` section +You can follow the steps mentioned in the {ref}`deployment-plugin-setup-k8s` section to enable the Flyte Dask plugin for your deployment. #### Step 2: Compute setup diff --git a/examples/k8s_pod_plugin/README.md b/examples/k8s_pod_plugin/README.md index e7b0e8dc8..2f18928dd 100644 --- a/examples/k8s_pod_plugin/README.md +++ b/examples/k8s_pod_plugin/README.md @@ -4,7 +4,11 @@ .. tags:: Integration, Kubernetes, Advanced ``` -Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. +```{important} +This plugin is no longer needed and is here only for backwards compatibility. No new versions will be published after v1.13.x Please use the `pod_template` and `pod_template_name` arguments to `@task` as described in the {ref}`Kubernetes task pod configuration guide ` instead. +``` + +Flyte tasks, represented by the {py:func}`@task ` decorator, are essentially single functions that run in one container. However, there may be situations where you need to run a job with more than one container or require additional capabilities, such as: - Running a hyper-parameter optimizer that stores state in a Redis database diff --git a/examples/k8s_spark_plugin/README.md b/examples/k8s_spark_plugin/README.md index 3bdbf721c..ad7a39cbe 100644 --- a/examples/k8s_spark_plugin/README.md +++ b/examples/k8s_spark_plugin/README.md @@ -27,7 +27,7 @@ However, please bear in mind that this functionality requires a significant Kube For optimal results, we highly recommend adopting the [multi-cluster mode](https://docs.flyte.org/en/latest/deployment/configuration/performance.html#multi-cluster-mode). -Additionally, consider enabling {std:ref}`resource quotas ` +Additionally, consider enabling {ref}`resource quotas ` for Spark Jobs that are both large in scale and executed frequently. Nonetheless, it is important to note that extremely short-duration jobs might not be the best fit for this setup. @@ -63,7 +63,7 @@ Flyte Spark employs the Spark on K8s operator in conjunction with a bespoke [Flyte Spark Plugin](https://pkg.go.dev/github.com/flyteorg/flyteplugins@v0.5.25/go/tasks/plugins/k8s/spark). This plugin serves as a backend component and necessitates activation within your deployment. -To enable it, follow the instructions outlined in the {ref}`flyte:deployment-plugin-setup-k8s` section. +To enable it, follow the instructions outlined in the {ref}`deployment-plugin-setup-k8s` section. :::{note} Refer to [this guide](https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/master/docs/gcp.md) to use GCP instead of AWS. diff --git a/examples/kfmpi_plugin/README.md b/examples/kfmpi_plugin/README.md index ad23c4729..1eeeac68b 100644 --- a/examples/kfmpi_plugin/README.md +++ b/examples/kfmpi_plugin/README.md @@ -55,3 +55,36 @@ pyflyte run --remote \ ```{auto-examples-toc} mpi_mnist ``` + +## MPI Plugin Troubleshooting Guide + +This section covers common issues encountered during the setup of the MPI operator for distributed training jobs on Flyte. + +**Worker Pods Failing to Start (Insufficient Resources)** + +MPI worker pods may fail to start or exhibit scheduling issues, leading to job timeouts or failures. This often occurs due to resource constraints (CPU, memory, or GPU) in the cluster. + +1. Adjust Resource Requests: +Ensure that each worker pod has sufficient resources. You can adjust the resource requests in your task definition: + +``` + requests=Resources(cpu="", mem="") +``` + +Modify the CPU and memory values according to your cluster's available resources. This helps prevent pod scheduling failures caused by resource constraints. + +2. Check Pod Logs for Errors: +If the worker pods still fail to start, check the logs for any related errors: + +``` + kubectl logs -n +``` + +Look for resource allocation or worker communication errors. + +**Workflow Registration Method Errors (Timeouts or Deadlocks)** + +If your MPI workflow hangs or times out, it may be caused by an incorrect workflow registration method. + +1. Verify Registration Method: + When using a custom image, refer to the Flyte documentation on [Registering workflows](https://docs.flyte.org/en/latest/user_guide/flyte_fundamentals/registering_workflows.html#registration-patterns) to ensure you're following the correct registration method. diff --git a/examples/kfpytorch_plugin/README.md b/examples/kfpytorch_plugin/README.md index 95c67245e..e4f631780 100644 --- a/examples/kfpytorch_plugin/README.md +++ b/examples/kfpytorch_plugin/README.md @@ -17,7 +17,7 @@ To use the PyTorch plugin, run the following command: pip install flytekitplugins-kfpytorch ``` -To enable the plugin in the backend, follow instructions outlined in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. ## Run the example on the Flyte cluster diff --git a/examples/kftensorflow_plugin/README.md b/examples/kftensorflow_plugin/README.md index 636d0514d..d97b4b477 100644 --- a/examples/kftensorflow_plugin/README.md +++ b/examples/kftensorflow_plugin/README.md @@ -17,7 +17,7 @@ To install the Kubeflow TensorFlow plugin, run the following command: pip install flytekitplugins-kftensorflow ``` -To enable the plugin in the backend, follow instructions outlined in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, follow instructions outlined in the {ref}`deployment-plugin-setup-k8s` guide. ## Run the example on the Flyte cluster diff --git a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py index d045bbb80..eac12209d 100644 --- a/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py +++ b/examples/mmcloud_agent/mmcloud_agent/mmcloud_agent_example_usage.py @@ -24,7 +24,7 @@ def to_int(s: str) -> int: # %% [markdown] -# [Resource](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/productionizing/customizing_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/projects/cookbook/en/latest/auto_examples/customizing_dependencies/multi_images.html) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. +# [Resource](https://docs.flyte.org/en/latest/user_guide/productionizing/customizing_task_resources.html) (cpu and mem) requests and limits, [container](https://docs.flyte.org/en/latest/user_guide/customizing_dependencies/index.html#customizing-dependencies) images, and [environment](https://docs.flyte.org/projects/flytekit/en/latest/generated/flytekit.task.html) variable specifications are supported. # %% diff --git a/examples/neptune_plugin/Dockerfile b/examples/neptune_plugin/Dockerfile new file mode 100644 index 000000000..80482086b --- /dev/null +++ b/examples/neptune_plugin/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.11-slim-bookworm +LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install --no-cache-dir -r /root/requirements.in + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/neptune_plugin/README.md b/examples/neptune_plugin/README.md new file mode 100644 index 000000000..67c4d845d --- /dev/null +++ b/examples/neptune_plugin/README.md @@ -0,0 +1,47 @@ +(neptune_plugin)= + +# Neptune plugin + +```{eval-rst} +.. tags:: Integration, Data, Metrics, Intermediate +``` + +[Neptune](https://neptune.ai/) is an experiment tracker for large-scale model training. It allows AI researchers to monitor their model training in real time, visualize and compare experiments, and collaborate on them with a team. This plugin enables seamless use of Neptune within Flyte by configuring links between the two platforms. You can find more information about how to use Neptune in their [documentation](https://docs.neptune.ai/). + +## Installation + +To install the Flyte Neptune plugin, run the following command: + +```bash +pip install flytekitplugins-neptune +``` + +## Example usage + +For a usage example, see the {doc}`Neptune example `. + +## Local testing + +To run {doc}`Neptune example ` locally: + +1. Create an account on [Neptune](https://neptune.ai/). +2. Create a project on Neptune. +3. In the example, set `NEPTUNE_PROJECT` to your project name. +4. Add a secret using [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html) with `key="neptune-api-token"` and `group="neptune-api-group"` +5. If you want to see the dynamic log links in the UI, then add the configuration in the next section. + +## Flyte deployment configuration + +To enable dynamic log links, add the plugin to Flyte's configuration file: +```yaml +plugins: + logs: + dynamic-log-links: + - neptune-run-id: + displayName: Neptune + templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.project }}?query=(%60flyte%2Fexecution_id%60%3Astring%20%3D%20%22{{ .executionName }}-{{ .nodeId }}-{{ .taskRetryAttempt }}%22)&lbViewUnpacked=true" +``` + +```{auto-examples-toc} +neptune_example +``` diff --git a/examples/neptune_plugin/neptune_plugin/__init__.py b/examples/neptune_plugin/neptune_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/neptune_plugin/neptune_plugin/neptune_example.py b/examples/neptune_plugin/neptune_plugin/neptune_example.py new file mode 100644 index 000000000..b6a7949b8 --- /dev/null +++ b/examples/neptune_plugin/neptune_plugin/neptune_example.py @@ -0,0 +1,168 @@ +# %% [markdown] +# (neptune_example)= +# +# # Neptune example +# Neptune is the MLOps stack component for experiment tracking. It offers a single place +# to log, compare, store, and collaborate on experiments and models. This plugin +# enables seamless use of Neptune within Flyte by configuring links between the +# two platforms. In this example, we learn how to train scale up training multiple +# XGBoost models and use Neptune for tracking. +# %% +from typing import List, Tuple + +import numpy as np +from flytekit import ( + ImageSpec, + Resources, + Secret, + current_context, + dynamic, + task, + workflow, +) +from flytekitplugins.neptune import neptune_init_run + +# %% [markdown] +# First, we specify the Neptune project that was created on Neptune's platform. +# Please update `NEPTUNE_PROJECT` to the value associated with your account. + +# %% +NEPTUNE_PROJECT = "username/project" + +# %% [markdown] +# Neptune requires an API key to authenticate with their service. In the above example, +# the secret is created using +# [Flyte's Secrets manager](https://docs.flyte.org/en/latest/user_guide/productionizing/secrets.html). + +# %% +api_key = Secret(key="neptune-api-token", group="neptune-api-group") + +# %% [markdown] +# Next, we use `ImageSpec` to construct a container with the dependencies for our +# XGBoost training task. Please set the `REGISTRY` to a registry that your cluster can access; + +# %% +REGISTRY = "localhost:30000" + +image = ImageSpec( + name="flytekit-xgboost", + packages=[ + "neptune", + "neptune-xgboost", + "flytekitplugins-neptune", + "scikit-learn==1.5.1", + "numpy==1.26.1", + "matplotlib==3.9.2", + ], + builder="default", + registry=REGISTRY, +) + + +# %% [markdown] +# First, we use a task to download the dataset and cache the data in Flyte: + + +# %% +@task( + container_image=image, + cache=True, + cache_version="v2", + requests=Resources(cpu="2", mem="2Gi"), +) +def get_dataset() -> Tuple[np.ndarray, np.ndarray]: + from sklearn.datasets import fetch_california_housing + + X, y = fetch_california_housing(return_X_y=True, as_frame=False) + return X, y + + +# %% [markdown] +# Next, we use the `neptune_init_run` decorator to configure Flyte to train an XGBoost +# model. The decorator requires an `api_key` secret to authenticate with Neptune and +# the task definition needs to request the same `api_key` secret. In the training +# function, the [Neptune run object](https://docs.neptune.ai/api/run/) is accessible +# through `current_context().neptune_run`, which is frequently used +# in Neptune's integrations. In this example, we pass the `Run` object into Neptune's +# XGBoost callback. + + +# %% +@task( + container_image=image, + secret_requests=[api_key], + requests=Resources(cpu="2", mem="4Gi"), +) +@neptune_init_run(project=NEPTUNE_PROJECT, secret=api_key) +def train_model(max_depth: int, X: np.ndarray, y: np.ndarray): + import xgboost as xgb + from neptune.integrations.xgboost import NeptuneCallback + from sklearn.model_selection import train_test_split + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123) + dtrain = xgb.DMatrix(X_train, label=y_train) + dval = xgb.DMatrix(X_test, label=y_test) + + ctx = current_context() + run = ctx.neptune_run + neptune_callback = NeptuneCallback(run=run) + + model_params = { + "tree_method": "hist", + "eta": 0.7, + "gamma": 0.001, + "max_depth": max_depth, + "objective": "reg:squarederror", + "eval_metric": ["mae", "rmse"], + } + evals = [(dtrain, "train"), (dval, "valid")] + + # Train the model and log metadata to the run in Neptune + xgb.train( + params=model_params, + dtrain=dtrain, + num_boost_round=57, + evals=evals, + callbacks=[ + neptune_callback, + xgb.callback.LearningRateScheduler(lambda epoch: 0.99**epoch), + xgb.callback.EarlyStopping(rounds=30), + ], + ) + + +# %% [markdown] +# With Flyte's dynamic workflows, we can scale up multiple training jobs with different +# `max_depths`: + + +# %% +@dynamic(container_image=image) +def train_multiple_models(max_depths: List[int], X: np.ndarray, y: np.ndarray): + for max_depth in max_depths: + train_model(max_depth=max_depth, X=X, y=y) + + +@workflow +def train_wf(max_depths: List[int] = [2, 4, 10]): + X, y = get_dataset() + train_multiple_models(max_depths=max_depths, X=X, y=y) + + +# %% [markdown] +# To run this workflow on a remote Flyte cluster run: +# ```bash +# union run --remote neptune_example.py train_wf +# ``` + + +# %% [markdown] +# To enable dynamic log links, add plugin to Flyte's configuration file: +# ```yaml +# plugins: +# logs: +# dynamic-log-links: +# - neptune-run-id: +# displayName: Neptune +# templateUris: "{{ .taskConfig.host }}/{{ .taskConfig.project }}?query=(%60flyte%2Fexecution_id%60%3Astring%20%3D%20%22{{ .executionName }}-{{ .nodeId }}-{{ .taskRetryAttempt }}%22)&lbViewUnpacked=true" +# ``` diff --git a/examples/neptune_plugin/requirements.in b/examples/neptune_plugin/requirements.in new file mode 100644 index 000000000..878f4ef4e --- /dev/null +++ b/examples/neptune_plugin/requirements.in @@ -0,0 +1,7 @@ +flytekitplugins-neptune +xgboost +neptune +neptune-xgboost +scikit-learn==1.5.1 +numpy==1.26.1 +matplotlib==3.9.2 diff --git a/examples/nim_plugin/README.md b/examples/nim_plugin/README.md index 506c9eab9..36011695b 100644 --- a/examples/nim_plugin/README.md +++ b/examples/nim_plugin/README.md @@ -29,7 +29,7 @@ pip install flytekitplugins-inference For a usage example, see {doc}`NIM example usage `. ```{note} -NIM can only be run in a Flyte cluster, not locally, as it must be deployed as a sidecar service in a Kubernetes pod. +NIM can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. ``` ```{toctree} diff --git a/examples/ollama_plugin/Dockerfile b/examples/ollama_plugin/Dockerfile new file mode 100644 index 000000000..0c46be23a --- /dev/null +++ b/examples/ollama_plugin/Dockerfile @@ -0,0 +1,23 @@ +# ###################### +# NOTE: For CI/CD only # +######################## +FROM python:3.11-slim-buster +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in + +# Copy the actual code +COPY . /root/ + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/ollama_plugin/README.md b/examples/ollama_plugin/README.md new file mode 100644 index 000000000..75b97f0fb --- /dev/null +++ b/examples/ollama_plugin/README.md @@ -0,0 +1,36 @@ +(ollama_plugin)= + +# Ollama + +```{eval-rst} +.. tags:: Inference, LLM +``` + +Serve large language models (LLMs) in a Flyte task. + +[Ollama](https://ollama.com/) simplifies the process of serving fine-tuned LLMs. +Whether you're generating predictions from a customized model or deploying it across different hardware setups, +Ollama enables you to encapsulate the entire workflow in a single pipeline. + +## Installation + +To use the Ollama plugin, run the following command: + +``` +pip install flytekitplugins-inference +``` + +## Example usage + +For a usage example, see {doc}`Ollama example usage `. + +```{note} +Ollama can only be run in a Flyte cluster as it must be deployed as a sidecar service in a Kubernetes pod. +``` + +```{toctree} +:maxdepth: -1 +:hidden: + +serve_llm +``` diff --git a/examples/ollama_plugin/ollama_plugin/__init__.py b/examples/ollama_plugin/ollama_plugin/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/ollama_plugin/ollama_plugin/serve_llm.py b/examples/ollama_plugin/ollama_plugin/serve_llm.py new file mode 100644 index 000000000..f96ef7252 --- /dev/null +++ b/examples/ollama_plugin/ollama_plugin/serve_llm.py @@ -0,0 +1,99 @@ +# %% [markdown] +# (serve_llm)= +# +# # Serve LLMs with Ollama +# +# In this guide, you'll learn how to locally serve Gemma2 and fine-tuned Llama3 models using Ollama within a Flyte task. +# +# Start by importing Ollama from the `flytekitplugins.inference` package and specifying the desired model name. +# +# Below is a straightforward example of serving a Gemma2 model: +# %% +from flytekit import ImageSpec, Resources, task +from flytekit.extras.accelerators import A10G +from flytekitplugins.inference import Model, Ollama +from openai import OpenAI + +image = ImageSpec( + name="ollama_serve", + registry="ghcr.io/flyteorg", + packages=["flytekitplugins-inference"], + builder="default", +) + +ollama_instance = Ollama(model=Model(name="gemma2"), gpu="1") + + +@task( + container_image=image, + pod_template=ollama_instance.pod_template, + accelerator=A10G, + requests=Resources(gpu="0"), +) +def model_serving(user_prompt: str) -> str: + client = OpenAI(base_url=f"{ollama_instance.base_url}/v1", api_key="ollama") # api key required but ignored + + completion = client.chat.completions.create( + model="gemma2", + messages=[ + { + "role": "user", + "content": user_prompt, + } + ], + temperature=0.5, + top_p=1, + max_tokens=1024, + ) + + return completion.choices[0].message.content + + +# %% [markdown] +# :::{important} +# Replace `ghcr.io/flyteorg` with a container registry to which you can publish. +# To upload the image to the local registry in the demo cluster, indicate the registry as `localhost:30000`. +# ::: +# +# The `model_serving` task initiates a sidecar service to serve the model, making it accessible on localhost via the `base_url` property. +# You can use either the chat or chat completion endpoints. +# +# By default, Ollama initializes the server with `cpu`, `gpu`, and `mem` set to `1`, `1`, and `15Gi`, respectively. +# You can adjust these settings to meet your requirements. +# +# To serve a fine-tuned model, provide the model configuration as `modelfile` within the `Model` dataclass. +# +# Below is an example of specifying a fine-tuned LoRA adapter for a Llama3 Mario model: +# %% +from flytekit.types.file import FlyteFile + +finetuned_ollama_instance = Ollama( + model=Model( + name="llama3-mario", + modelfile="FROM llama3\nADAPTER {inputs.ggml}\nPARAMETER temperature 1\nPARAMETER num_ctx 4096\nSYSTEM {inputs.system_prompt}", + ), + gpu="1", +) + + +@task( + container_image=image, + pod_template=finetuned_ollama_instance.pod_template, + accelerator=A10G, + requests=Resources(gpu="0"), +) +def finetuned_model_serving(ggml: FlyteFile, system_prompt: str): + ... + + +# %% [markdown] +# `{inputs.ggml}` and `{inputs.system_prompt}` are materialized at run time, with `ggml` and `system_prompt` available as inputs to the task. +# +# Ollama models can be integrated into different stages of your AI workflow, including data pre-processing, +# model inference, and post-processing. Flyte also allows serving multiple Ollama models simultaneously +# on various instances. +# +# This integration enables you to self-host and serve AI models on your own infrastructure, +# ensuring full control over costs and data security. +# +# For more detailed information on the models natively supported by Ollama, visit the [Ollama models library](https://ollama.com/library). diff --git a/examples/ollama_plugin/requirements.in b/examples/ollama_plugin/requirements.in new file mode 100644 index 000000000..a4a684ce6 --- /dev/null +++ b/examples/ollama_plugin/requirements.in @@ -0,0 +1 @@ +flytekitplugins-inference>=1.13.6b1 diff --git a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py index 8c8d36b2a..adc164f4d 100644 --- a/examples/pandera_plugin/pandera_plugin/basic_schema_example.py +++ b/examples/pandera_plugin/pandera_plugin/basic_schema_example.py @@ -15,7 +15,7 @@ from flytekit import ImageSpec, task, workflow from pandera.typing import DataFrame, Series -custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"]) +custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"]) # %% [markdown] # ## A Simple Data Processing Pipeline diff --git a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py index 85fdef648..2cb649312 100644 --- a/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py +++ b/examples/pandera_plugin/pandera_plugin/validating_and_testing_ml_pipelines.py @@ -52,7 +52,7 @@ from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score -custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn"]) +custom_image = ImageSpec(registry="ghcr.io/flyteorg", packages=["flytekitplugins-pandera", "scikit-learn", "pyarrow"]) # %% [markdown] # We also need to import the `pandera` flytekit plugin to enable dataframe runtime type-checking: diff --git a/examples/papermill_plugin/papermill_plugin/simple.py b/examples/papermill_plugin/papermill_plugin/simple.py index 9d4536561..90b642b9b 100644 --- a/examples/papermill_plugin/papermill_plugin/simple.py +++ b/examples/papermill_plugin/papermill_plugin/simple.py @@ -32,6 +32,7 @@ name="simple-nb", notebook_path=str(pathlib.Path(__file__).parent.absolute() / "nb_simple.ipynb"), render_deck=True, + enable_deck=True, inputs=kwtypes(v=float), outputs=kwtypes(square=float), ) diff --git a/examples/perian_agent/Dockerfile b/examples/perian_agent/Dockerfile new file mode 100644 index 000000000..d56c81ef4 --- /dev/null +++ b/examples/perian_agent/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.8-slim-buster +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks + +WORKDIR /root +ENV VENV /opt/venv +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PYTHONPATH /root + +# This is necessary for opencv to work +RUN apt-get update && apt-get install -y libsm6 libxext6 libxrender-dev ffmpeg build-essential curl + +WORKDIR /root + +ENV VENV /opt/venv +# Virtual environment +RUN python3 -m venv ${VENV} +ENV PATH="${VENV}/bin:$PATH" + +# Install Python dependencies +COPY requirements.in /root +RUN pip install -r /root/requirements.in +RUN pip freeze + +# Copy the actual code +COPY . /root + +# This tag is supplied by the build script and will be used to determine the version +# when registering tasks, workflows, and launch plans +ARG tag +ENV FLYTE_INTERNAL_IMAGE $tag diff --git a/examples/perian_agent/README.md b/examples/perian_agent/README.md new file mode 100644 index 000000000..a07769d9d --- /dev/null +++ b/examples/perian_agent/README.md @@ -0,0 +1,21 @@ +```{eval-rst} +.. tags:: Cloud, GPU, Integration, Advanced +``` + +(perian_agent)= + +# PERIAN Job Platform Agent + +The PERIAN Flyte Agent enables you to execute Flyte tasks on the [PERIAN Sky Platform](https://perian.io/). PERIAN allows the execution of any task on servers aggregated from multiple cloud providers. + +Example usage: + +```{auto-examples-toc} +example +``` + +To get started with PERIAN, see the [PERIAN documentation](https://perian.io/docs/overview) and the [PERIAN Flyte Agent documentation](https://perian.io/docs/flyte-getting-started). + +## Agent setup + +Consult the [PERIAN Flyte Agent setup guide](https://perian.io/docs/flyte-setup-guide). diff --git a/examples/perian_agent/perian_agent/__init__.py b/examples/perian_agent/perian_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/perian_agent/perian_agent/example.py b/examples/perian_agent/perian_agent/example.py new file mode 100644 index 000000000..a23060be2 --- /dev/null +++ b/examples/perian_agent/perian_agent/example.py @@ -0,0 +1,40 @@ +# %% [markdown] +# (example)= +# # PERIAN agent example usage +# +# This example shows how to use the PERIAN agent to execute tasks on PERIAN Job Platform. + +# %% +from flytekit import ImageSpec, task, workflow +from flytekitplugins.perian_job import PerianConfig + +image_spec = ImageSpec( + name="flyte-test", + registry="my-registry", + python_version="3.11", + apt_packages=["wget", "curl", "git"], + packages=[ + "flytekitplugins-perian-job", + ], +) + + +# %% [markdown] +# `PerianConfig` configures `PerianTask`. Tasks specified with `PerianConfig` will be executed on PERIAN Job Platform. + + +# %% +@task( + container_image=image_spec, + task_config=PerianConfig( + accelerators=1, + accelerator_type="A100", + ), +) +def perian_hello(name: str) -> str: + return f"hello {name}!" + + +@workflow +def my_wf(name: str = "world") -> str: + return perian_hello(name=name) diff --git a/examples/perian_agent/requirements.in b/examples/perian_agent/requirements.in new file mode 100644 index 000000000..b0b2423b0 --- /dev/null +++ b/examples/perian_agent/requirements.in @@ -0,0 +1,5 @@ +flytekit>=1.7.0 +wheel +matplotlib +flytekitplugins-deck-standard +flytekitplugins-perian-job diff --git a/examples/pima_diabetes/requirements.in b/examples/pima_diabetes/requirements.in index b571e33d3..f09c8717b 100644 --- a/examples/pima_diabetes/requirements.in +++ b/examples/pima_diabetes/requirements.in @@ -7,3 +7,4 @@ joblib scikit-learn tabulate matplotlib +pandas diff --git a/examples/ray_plugin/README.md b/examples/ray_plugin/README.md index 3ad2a65a8..575c97b50 100644 --- a/examples/ray_plugin/README.md +++ b/examples/ray_plugin/README.md @@ -25,7 +25,7 @@ To install the Ray plugin, run the following command: pip install flytekitplugins-ray ``` -To enable the plugin in the backend, refer to the instructions provided in the {std:ref}`flyte:deployment-plugin-setup-k8s` guide. +To enable the plugin in the backend, refer to the instructions provided in the {ref}`deployment-plugin-setup-k8s` guide. ## Implementation details diff --git a/examples/snowflake_plugin/README.md b/examples/snowflake_plugin/README.md index 6921e44ae..43a161c5c 100644 --- a/examples/snowflake_plugin/README.md +++ b/examples/snowflake_plugin/README.md @@ -1,3 +1,14 @@ -**Note:** This example code uses a legacy implementation of the Snowflake integration. We recommend using the [Snowflake agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +# Snowflake plugin + +```{warning} +This example code uses a legacy implementation of the Snowflake integration. We recommend using the [Snowflake agent](https://docs.flyte.org/en/latest/flytesnacks/examples/databricks_agent/index.html) instead. +``` This directory contains example code for the deprecated Snowflake plugin. For documentation on installing and using the plugin, see the [Snowflake plugin documentation](https://docs.flyte.org/en/latest/deprecated_integrations/snowflake_plugin/index.html) + +```{toctree} +:maxdepth: -1 +:hidden: + +snowflake_plugin_example +``` diff --git a/examples/snowflake_plugin/snowflake_plugin.md b/examples/snowflake_plugin/snowflake_plugin.md index 34f622387..e4313fb11 100644 --- a/examples/snowflake_plugin/snowflake_plugin.md +++ b/examples/snowflake_plugin/snowflake_plugin.md @@ -20,7 +20,7 @@ pip install flytekitplugins-snowflake If you intend to run the plugin on the Flyte cluster, you must first set it up on the backend. Please refer to the -{std:ref}`Snowflake plugin setup guide ` +{ref}`Snowflake plugin setup guide ` for detailed instructions. ## Run the example on the Flyte cluster diff --git a/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py b/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py index 7ee3b2858..a9903a900 100644 --- a/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py +++ b/examples/snowflake_plugin/snowflake_plugin/snowflake_plugin_example.py @@ -1,3 +1,8 @@ +# %% [markdown] +# # Snowflake plugin example +# +# %% + from flytekit import kwtypes, workflow from flytekitplugins.snowflake import SnowflakeConfig, SnowflakeTask diff --git a/examples/wandb_plugin/Dockerfile b/examples/wandb_plugin/Dockerfile index 4e04f77bd..65da827a3 100644 --- a/examples/wandb_plugin/Dockerfile +++ b/examples/wandb_plugin/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.11-slim-bookworm -LABEL org.opencontainers.image.source https://github.com/flyteorg/flytesnacks +LABEL org.opencontainers.image.source=https://github.com/flyteorg/flytesnacks WORKDIR /root ENV VENV /opt/venv diff --git a/examples/wandb_plugin/README.md b/examples/wandb_plugin/README.md index 53c3ddfd4..b5f1da584 100644 --- a/examples/wandb_plugin/README.md +++ b/examples/wandb_plugin/README.md @@ -1,4 +1,4 @@ -(wandb)= +(wandb_plugin)= # Weights and Biases diff --git a/examples/whylogs_plugin/requirements.in b/examples/whylogs_plugin/requirements.in index 0091e81e3..3ecac63ec 100644 --- a/examples/whylogs_plugin/requirements.in +++ b/examples/whylogs_plugin/requirements.in @@ -4,6 +4,7 @@ matplotlib flytekitplugins-deck-standard flytekitplugins-whylogs>=1.1.1b0 scikit-learn -whylogs[s3]==1.3.30 +whylogs[s3] whylogs[mlflow] whylogs[whylabs] +pandas