From 0cfa681d140a6ac6385c13913bfc39f0ba7d0c24 Mon Sep 17 00:00:00 2001 From: Yee Hing Tong Date: Wed, 17 Aug 2022 15:34:07 -0700 Subject: [PATCH] Use joblib hashing to generate cache key to ensure repeatability (#1126) * cherry pick 97b454b1 Signed-off-by: Yee Hing Tong * requirements Signed-off-by: Yee Hing Tong * Fix usage of save in ProtoJoblibHasher Signed-off-by: Eduardo Apolinario * Regenerate requirements using python 3.7 Signed-off-by: Eduardo Apolinario * Add test_stable_cache_key Signed-off-by: Eduardo Apolinario Signed-off-by: Yee Hing Tong Signed-off-by: Eduardo Apolinario Co-authored-by: Eduardo Apolinario --- dev-requirements.txt | 58 +++++----- doc-requirements.txt | 101 ++++++++++-------- flytekit/core/local_cache.py | 21 +++- requirements-spark2.txt | 18 ++-- requirements.txt | 18 ++-- setup.py | 1 + .../workflows/requirements.txt | 27 ++--- tests/flytekit/unit/core/test_local_cache.py | 57 +++++++++- 8 files changed, 190 insertions(+), 111 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 645ffbeb18..b477f2553b 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -65,7 +65,7 @@ cookiecutter==2.1.1 # via # -c requirements.txt # flytekit -coverage[toml]==6.4.1 +coverage[toml]==6.4.4 # via # -r dev-requirements.in # pytest-cov @@ -96,7 +96,7 @@ diskcache==5.4.0 # via # -c requirements.txt # flytekit -distlib==0.3.4 +distlib==0.3.5 # via virtualenv distro==1.7.0 # via docker-compose @@ -106,9 +106,7 @@ docker[ssh]==5.0.3 # docker-compose # flytekit docker-compose==1.29.2 - # via - # pytest-docker - # pytest-flyte + # via pytest-flyte docker-image-py==0.1.12 # via # -c requirements.txt @@ -121,9 +119,9 @@ docstring-parser==0.14.1 # via # -c requirements.txt # flytekit -filelock==3.7.1 +filelock==3.8.0 # via virtualenv -flyteidl==1.1.8 +flyteidl==1.1.12 # via # -c requirements.txt # flytekit @@ -132,23 +130,23 @@ google-api-core[grpc]==2.8.2 # google-cloud-bigquery # google-cloud-bigquery-storage # google-cloud-core -google-auth==2.9.0 +google-auth==2.10.0 # via # google-api-core # google-cloud-core -google-cloud-bigquery==3.2.0 +google-cloud-bigquery==3.3.1 # via -r dev-requirements.in -google-cloud-bigquery-storage==2.14.0 +google-cloud-bigquery-storage==2.14.2 # via # -r dev-requirements.in # google-cloud-bigquery -google-cloud-core==2.3.1 +google-cloud-core==2.3.2 # via google-cloud-bigquery google-crc32c==1.3.0 # via google-resumable-media google-resumable-media==2.3.3 # via google-cloud-bigquery -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # -c requirements.txt # flyteidl @@ -166,7 +164,7 @@ grpcio-status==1.47.0 # -c requirements.txt # flytekit # google-api-core -identify==2.5.1 +identify==2.5.3 # via pre-commit idna==3.3 # via @@ -205,12 +203,15 @@ jinja2-time==0.2.0 # -c requirements.txt # cookiecutter joblib==1.1.0 - # via -r dev-requirements.in + # via + # -c requirements.txt + # -r dev-requirements.in + # flytekit jsonschema==3.2.0 # via # -c requirements.txt # docker-compose -keyring==23.6.0 +keyring==23.8.2 # via # -c requirements.txt # flytekit @@ -232,11 +233,11 @@ marshmallow-jsonschema==0.13.0 # via # -c requirements.txt # flytekit -matplotlib-inline==0.1.3 +matplotlib-inline==0.1.5 # via ipython mock==4.0.3 # via -r dev-requirements.in -mypy==0.961 +mypy==0.971 # via -r dev-requirements.in mypy-extensions==0.4.3 # via @@ -281,7 +282,7 @@ pre-commit==2.20.0 # via -r dev-requirements.in prompt-toolkit==3.0.30 # via ipython -proto-plus==1.20.6 +proto-plus==1.22.0 # via # google-cloud-bigquery # google-cloud-bigquery-storage @@ -323,7 +324,7 @@ pycparser==2.21 # via # -c requirements.txt # cffi -pygments==2.12.0 +pygments==2.13.0 # via ipython pynacl==1.5.0 # via paramiko @@ -347,7 +348,7 @@ pytest==7.1.2 # pytest-flyte pytest-cov==3.0.0 # via -r dev-requirements.in -pytest-docker==0.12.0 +pytest-docker==1.0.0 # via pytest-flyte pytest-flyte @ git+https://github.com/flyteorg/pytest-flyte@main # via -r dev-requirements.in @@ -373,7 +374,7 @@ pytimeparse==1.1.8 # via # -c requirements.txt # flytekit -pytz==2022.1 +pytz==2022.2.1 # via # -c requirements.txt # flytekit @@ -385,7 +386,7 @@ pyyaml==5.4.1 # docker-compose # flytekit # pre-commit -regex==2022.7.9 +regex==2022.7.25 # via # -c requirements.txt # docker-image-py @@ -407,9 +408,9 @@ retry==0.9.2 # via # -c requirements.txt # flytekit -rsa==4.8 +rsa==4.9 # via google-auth -secretstorage==3.3.2 +secretstorage==3.3.3 # via # -c requirements.txt # keyring @@ -426,7 +427,6 @@ six==1.16.0 # jsonschema # paramiko # python-dateutil - # virtualenv # websocket-client sortedcontainers==2.4.0 # via @@ -449,7 +449,7 @@ tomli==2.0.1 # coverage # mypy # pytest -torch==1.12.0 +torch==1.12.1 # via -r dev-requirements.in traitlets==5.3.0 # via @@ -471,13 +471,13 @@ typing-inspect==0.7.1 # via # -c requirements.txt # dataclasses-json -urllib3==1.26.10 +urllib3==1.26.11 # via # -c requirements.txt # flytekit # requests # responses -virtualenv==20.15.1 +virtualenv==20.16.3 # via pre-commit wcwidth==0.2.5 # via prompt-toolkit @@ -495,7 +495,7 @@ wrapt==1.14.1 # -c requirements.txt # deprecated # flytekit -zipp==3.8.0 +zipp==3.8.1 # via # -c requirements.txt # importlib-metadata diff --git a/doc-requirements.txt b/doc-requirements.txt index e9f16c89a0..5bfb8c5d31 100644 --- a/doc-requirements.txt +++ b/doc-requirements.txt @@ -18,9 +18,9 @@ argon2-cffi-bindings==21.2.0 # via argon2-cffi arrow==1.2.2 # via jinja2-time -astroid==2.11.6 +astroid==2.12.2 # via sphinx-autoapi -attrs==21.4.0 +attrs==22.1.0 # via # jsonschema # visions @@ -42,7 +42,7 @@ binaryornot==0.4.4 # via cookiecutter bleach==5.0.1 # via nbconvert -botocore==1.27.22 +botocore==1.27.53 # via -r doc-requirements.in cachetools==5.2.0 # via google-auth @@ -86,7 +86,7 @@ dataclasses-json==0.5.7 # via # dolt-integrations # flytekit -debugpy==1.6.0 +debugpy==1.6.3 # via ipykernel decorator==5.1.1 # via @@ -118,13 +118,13 @@ entrypoints==0.4 # jupyter-client # nbconvert # papermill -fastjsonschema==2.15.3 +fastjsonschema==2.16.1 # via nbformat -flyteidl==1.1.8 +flyteidl==1.1.12 # via flytekit -fonttools==4.33.3 +fonttools==4.35.0 # via matplotlib -fsspec==2022.5.0 +fsspec==2022.7.1 # via # -r doc-requirements.in # modin @@ -135,29 +135,29 @@ google-api-core[grpc]==2.8.2 # google-cloud-bigquery # google-cloud-bigquery-storage # google-cloud-core -google-auth==2.9.0 +google-auth==2.10.0 # via # google-api-core # google-cloud-core # kubernetes google-cloud==0.34.0 # via -r doc-requirements.in -google-cloud-bigquery==3.2.0 +google-cloud-bigquery==3.3.1 # via -r doc-requirements.in -google-cloud-bigquery-storage==2.13.2 +google-cloud-bigquery-storage==2.14.2 # via google-cloud-bigquery -google-cloud-core==2.3.1 +google-cloud-core==2.3.2 # via google-cloud-bigquery google-crc32c==1.3.0 # via google-resumable-media google-resumable-media==2.3.3 # via google-cloud-bigquery -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # flyteidl # google-api-core # grpcio-status -great-expectations==0.15.12 +great-expectations==0.15.18 # via -r doc-requirements.in greenlet==1.1.2 # via sqlalchemy @@ -190,9 +190,9 @@ importlib-metadata==4.12.0 # markdown # sphinx # sqlalchemy -importlib-resources==5.8.0 +importlib-resources==5.9.0 # via jsonschema -ipykernel==6.15.0 +ipykernel==6.15.1 # via # ipywidgets # jupyter @@ -211,7 +211,9 @@ ipython-genutils==0.2.0 # notebook # qtconsole ipywidgets==7.7.1 - # via jupyter + # via + # great-expectations + # jupyter jedi==0.18.1 # via ipython jeepney==0.8.0 @@ -235,13 +237,14 @@ jmespath==1.0.1 # via botocore joblib==1.1.0 # via + # flytekit # pandas-profiling # phik jsonpatch==1.32 # via great-expectations jsonpointer==2.3 # via jsonpatch -jsonschema==4.6.1 +jsonschema==4.10.0 # via # altair # great-expectations @@ -257,7 +260,7 @@ jupyter-client==7.3.4 # qtconsole jupyter-console==6.4.4 # via jupyter -jupyter-core==4.10.0 +jupyter-core==4.11.1 # via # jupyter-client # nbconvert @@ -268,17 +271,21 @@ jupyterlab-pygments==0.2.2 # via nbconvert jupyterlab-widgets==1.1.1 # via ipywidgets -keyring==23.6.0 +keyring==23.8.2 # via flytekit -kiwisolver==1.4.3 +kiwisolver==1.4.4 # via matplotlib kubernetes==24.2.0 # via -r doc-requirements.in lazy-object-proxy==1.7.1 # via astroid lxml==4.9.1 - # via sphinx-material -markdown==3.3.7 + # via + # nbconvert + # sphinx-material +makefun==1.14.0 + # via great-expectations +markdown==3.4.1 # via -r doc-requirements.in markupsafe==2.1.1 # via @@ -294,13 +301,13 @@ marshmallow-enum==1.5.1 # via dataclasses-json marshmallow-jsonschema==0.13.0 # via flytekit -matplotlib==3.5.2 +matplotlib==3.5.3 # via # missingno # pandas-profiling # phik # seaborn -matplotlib-inline==0.1.3 +matplotlib-inline==0.1.5 # via # ipykernel # ipython @@ -324,7 +331,7 @@ nbclient==0.6.6 # via # nbconvert # papermill -nbconvert==6.5.0 +nbconvert==6.5.3 # via # jupyter # notebook @@ -398,7 +405,7 @@ pandera==0.9.0 # via -r doc-requirements.in pandocfilters==1.5.0 # via nbconvert -papermill==2.3.4 +papermill==2.4.0 # via -r doc-requirements.in parso==0.8.3 # via jedi @@ -413,7 +420,9 @@ pillow==9.2.0 # imagehash # matplotlib # visions -plotly==5.9.0 +pkgutil-resolve-name==1.3.10 + # via jsonschema +plotly==5.10.0 # via -r doc-requirements.in prometheus-client==0.14.1 # via notebook @@ -421,7 +430,7 @@ prompt-toolkit==3.0.30 # via # ipython # jupyter-console -proto-plus==1.20.6 +proto-plus==1.22.0 # via # google-cloud-bigquery # google-cloud-bigquery-storage @@ -461,11 +470,11 @@ pyasn1-modules==0.2.8 # via google-auth pycparser==2.21 # via cffi -pydantic==1.9.1 +pydantic==1.9.2 # via # pandas-profiling # pandera -pygments==2.12.0 +pygments==2.13.0 # via # furo # ipython @@ -497,7 +506,7 @@ python-dateutil==2.8.2 # kubernetes # matplotlib # pandas -python-json-logger==2.0.2 +python-json-logger==2.0.4 # via flytekit python-slugify[unidecode]==6.1.2 # via @@ -505,7 +514,7 @@ python-slugify[unidecode]==6.1.2 # sphinx-material pytimeparse==1.1.8 # via flytekit -pytz==2022.1 +pytz==2022.2.1 # via # babel # flytekit @@ -523,7 +532,7 @@ pyyaml==6.0 # pandas-profiling # papermill # sphinx-autoapi -pyzmq==23.2.0 +pyzmq==23.2.1 # via # ipykernel # jupyter-client @@ -531,9 +540,9 @@ pyzmq==23.2.0 # qtconsole qtconsole==5.3.1 # via jupyter -qtpy==2.1.0 +qtpy==2.2.0 # via qtconsole -regex==2022.6.2 +regex==2022.7.25 # via docker-image-py requests==2.28.1 # via @@ -555,7 +564,7 @@ responses==0.21.0 # via flytekit retry==0.9.2 # via flytekit -rsa==4.8 +rsa==4.9 # via google-auth ruamel-yaml==0.17.17 # via great-expectations @@ -573,7 +582,7 @@ seaborn==0.11.2 # via # missingno # pandas-profiling -secretstorage==3.3.2 +secretstorage==3.3.3 # via keyring send2trash==1.8.0 # via notebook @@ -608,7 +617,7 @@ sphinx==4.5.0 # sphinx-panels # sphinx-prompt # sphinxcontrib-yt -sphinx-autoapi==1.8.4 +sphinx-autoapi==1.9.0 # via -r doc-requirements.in sphinx-basic-ng==0.0.1a12 # via furo @@ -618,7 +627,7 @@ sphinx-copybutton==0.5.0 # via -r doc-requirements.in sphinx-fontawesome==0.0.6 # via -r doc-requirements.in -sphinx-gallery==0.10.1 +sphinx-gallery==0.11.0 # via -r doc-requirements.in sphinx-material==0.0.35 # via -r doc-requirements.in @@ -640,7 +649,7 @@ sphinxcontrib-serializinghtml==1.1.5 # via sphinx sphinxcontrib-yt==0.2.2 # via -r doc-requirements.in -sqlalchemy==1.4.39 +sqlalchemy==1.4.40 # via -r doc-requirements.in statsd==3.3.0 # via flytekit @@ -662,9 +671,9 @@ textwrap3==0.9.2 # via ansiwrap tinycss2==1.1.1 # via nbconvert -toolz==0.11.2 +toolz==0.12.0 # via altair -torch==1.11.0 +torch==1.12.1 # via -r doc-requirements.in tornado==6.2 # via @@ -711,7 +720,7 @@ typing-inspect==0.7.1 # via # dataclasses-json # pandera -tzdata==2022.1 +tzdata==2022.2 # via pytz-deprecation-shim tzlocal==4.2 # via great-expectations @@ -719,7 +728,7 @@ unidecode==1.3.4 # via # python-slugify # sphinx-autoapi -urllib3==1.26.9 +urllib3==1.26.11 # via # botocore # flytekit @@ -749,7 +758,7 @@ wrapt==1.14.1 # deprecated # flytekit # pandera -zipp==3.8.0 +zipp==3.8.1 # via # importlib-metadata # importlib-resources diff --git a/flytekit/core/local_cache.py b/flytekit/core/local_cache.py index 4afab73955..48b6f9c7da 100644 --- a/flytekit/core/local_cache.py +++ b/flytekit/core/local_cache.py @@ -1,8 +1,8 @@ -import base64 from typing import Optional -import cloudpickle from diskcache import Cache +from google.protobuf.struct_pb2 import Struct +from joblib.hashing import NumpyHasher from flytekit.models.literals import Literal, LiteralCollection, LiteralMap @@ -28,15 +28,26 @@ def _recursive_hash_placement(literal: Literal) -> Literal: return literal +class ProtoJoblibHasher(NumpyHasher): + def save(self, obj): + if isinstance(obj, Struct): + obj = dict( + rewrite_rule="google.protobuf.struct_pb2.Struct", + cls=obj.__class__, + obj=dict(sorted(obj.fields.items())), + ) + NumpyHasher.save(self, obj) + + def _calculate_cache_key(task_name: str, cache_version: str, input_literal_map: LiteralMap) -> str: # Traverse the literals and replace the literal with a new literal that only contains the hash literal_map_overridden = {} for key, literal in input_literal_map.literals.items(): literal_map_overridden[key] = _recursive_hash_placement(literal) - # Pickle the literal map and use base64 encoding to generate a representation of it - b64_encoded = base64.b64encode(cloudpickle.dumps(LiteralMap(literal_map_overridden))) - return f"{task_name}-{cache_version}-{b64_encoded}" + # Generate a hash key of inputs with joblib + hashed_inputs = ProtoJoblibHasher().hash(literal_map_overridden) + return f"{task_name}-{cache_version}-{hashed_inputs}" class LocalTaskCache(object): diff --git a/requirements-spark2.txt b/requirements-spark2.txt index 8398d30d1e..ded88f1bb2 100644 --- a/requirements-spark2.txt +++ b/requirements-spark2.txt @@ -52,9 +52,9 @@ docker-image-py==0.1.12 # via flytekit docstring-parser==0.14.1 # via flytekit -flyteidl==1.1.8 +flyteidl==1.1.12 # via flytekit -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # flyteidl # grpcio-status @@ -82,9 +82,11 @@ jinja2==3.1.2 # jinja2-time jinja2-time==0.2.0 # via cookiecutter +joblib==1.1.0 + # via flytekit jsonschema==3.2.0 # via -r requirements.in -keyring==23.6.0 +keyring==23.8.2 # via flytekit markupsafe==2.1.1 # via jinja2 @@ -146,7 +148,7 @@ python-slugify==6.1.2 # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2022.1 +pytz==2022.2.1 # via # flytekit # pandas @@ -155,7 +157,7 @@ pyyaml==5.4.1 # -r requirements.in # cookiecutter # flytekit -regex==2022.7.9 +regex==2022.7.25 # via docker-image-py requests==2.28.1 # via @@ -167,7 +169,7 @@ responses==0.21.0 # via flytekit retry==0.9.2 # via flytekit -secretstorage==3.3.2 +secretstorage==3.3.3 # via keyring singledispatchmethod==1.0 # via flytekit @@ -192,7 +194,7 @@ typing-extensions==4.3.0 # typing-inspect typing-inspect==0.7.1 # via dataclasses-json -urllib3==1.26.10 +urllib3==1.26.11 # via # flytekit # requests @@ -207,7 +209,7 @@ wrapt==1.14.1 # via # deprecated # flytekit -zipp==3.8.0 +zipp==3.8.1 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements.txt b/requirements.txt index 153da8b4d2..17a6487f7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -50,9 +50,9 @@ docker-image-py==0.1.12 # via flytekit docstring-parser==0.14.1 # via flytekit -flyteidl==1.1.8 +flyteidl==1.1.12 # via flytekit -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # flyteidl # grpcio-status @@ -80,9 +80,11 @@ jinja2==3.1.2 # jinja2-time jinja2-time==0.2.0 # via cookiecutter +joblib==1.1.0 + # via flytekit jsonschema==3.2.0 # via -r requirements.in -keyring==23.6.0 +keyring==23.8.2 # via flytekit markupsafe==2.1.1 # via jinja2 @@ -144,7 +146,7 @@ python-slugify==6.1.2 # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2022.1 +pytz==2022.2.1 # via # flytekit # pandas @@ -153,7 +155,7 @@ pyyaml==5.4.1 # -r requirements.in # cookiecutter # flytekit -regex==2022.7.9 +regex==2022.7.25 # via docker-image-py requests==2.28.1 # via @@ -165,7 +167,7 @@ responses==0.21.0 # via flytekit retry==0.9.2 # via flytekit -secretstorage==3.3.2 +secretstorage==3.3.3 # via keyring singledispatchmethod==1.0 # via flytekit @@ -190,7 +192,7 @@ typing-extensions==4.3.0 # typing-inspect typing-inspect==0.7.1 # via dataclasses-json -urllib3==1.26.10 +urllib3==1.26.11 # via # flytekit # requests @@ -205,7 +207,7 @@ wrapt==1.14.1 # via # deprecated # flytekit -zipp==3.8.0 +zipp==3.8.1 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: diff --git a/setup.py b/setup.py index 85af8691b6..56e2dd5624 100644 --- a/setup.py +++ b/setup.py @@ -52,6 +52,7 @@ "grpcio-status>=1.43,!=1.45.0", "importlib-metadata", "pyopenssl", + "joblib", "protobuf>=3.6.1,<4", "python-json-logger>=2.0.0", "pytimeparse>=1.1.8,<2.0.0", diff --git a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt index c93c56435c..0fc659135f 100644 --- a/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt +++ b/tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.txt @@ -46,13 +46,13 @@ docker-image-py==0.1.12 # via flytekit docstring-parser==0.14.1 # via flytekit -flyteidl==1.1.8 +flyteidl==1.1.12 # via flytekit -flytekit==1.1.0 +flytekit==1.1.1 # via -r tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.in -fonttools==4.33.3 +fonttools==4.35.0 # via matplotlib -googleapis-common-protos==1.56.3 +googleapis-common-protos==1.56.4 # via # flyteidl # grpcio-status @@ -81,9 +81,9 @@ jinja2-time==0.2.0 # via cookiecutter joblib==1.1.0 # via -r tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.in -keyring==23.6.0 +keyring==23.8.2 # via flytekit -kiwisolver==1.4.3 +kiwisolver==1.4.4 # via matplotlib markupsafe==2.1.1 # via jinja2 @@ -96,7 +96,7 @@ marshmallow-enum==1.5.1 # via dataclasses-json marshmallow-jsonschema==0.13.0 # via flytekit -matplotlib==3.5.2 +matplotlib==3.5.3 # via -r tests/flytekit/integration/remote/mock_flyte_repo/workflows/requirements.in mypy-extensions==0.4.3 # via typing-inspect @@ -142,17 +142,18 @@ pyparsing==3.0.9 # packaging python-dateutil==2.8.2 # via + # arrow # croniter # flytekit # matplotlib # pandas -python-json-logger==2.0.2 +python-json-logger==2.0.4 # via flytekit python-slugify==6.1.2 # via cookiecutter pytimeparse==1.1.8 # via flytekit -pytz==2022.1 +pytz==2022.2.1 # via # flytekit # pandas @@ -160,7 +161,7 @@ pyyaml==6.0 # via # cookiecutter # flytekit -regex==2022.6.2 +regex==2022.7.25 # via docker-image-py requests==2.28.1 # via @@ -172,7 +173,7 @@ responses==0.21.0 # via flytekit retry==0.9.2 # via flytekit -secretstorage==3.3.2 +secretstorage==3.3.3 # via keyring singledispatchmethod==1.0 # via flytekit @@ -196,7 +197,7 @@ typing-extensions==4.3.0 # typing-inspect typing-inspect==0.7.1 # via dataclasses-json -urllib3==1.26.9 +urllib3==1.26.11 # via # flytekit # requests @@ -211,5 +212,5 @@ wrapt==1.14.1 # via # deprecated # flytekit -zipp==3.8.0 +zipp==3.8.1 # via importlib-metadata diff --git a/tests/flytekit/unit/core/test_local_cache.py b/tests/flytekit/unit/core/test_local_cache.py index fe09fac830..674f6176e1 100644 --- a/tests/flytekit/unit/core/test_local_cache.py +++ b/tests/flytekit/unit/core/test_local_cache.py @@ -1,7 +1,7 @@ import datetime import typing from dataclasses import dataclass -from typing import List +from typing import Dict, List import pandas from dataclasses_json import dataclass_json @@ -10,12 +10,16 @@ from flytekit.core.base_sql_task import SQLTask from flytekit.core.base_task import kwtypes +from flytekit.core.context_manager import FlyteContextManager from flytekit.core.dynamic_workflow_task import dynamic from flytekit.core.hash import HashMethod -from flytekit.core.local_cache import LocalTaskCache +from flytekit.core.local_cache import LocalTaskCache, _calculate_cache_key from flytekit.core.task import TaskMetadata, task from flytekit.core.testing import task_mock +from flytekit.core.type_engine import TypeEngine from flytekit.core.workflow import workflow +from flytekit.models.literals import LiteralMap +from flytekit.models.types import LiteralType, SimpleType from flytekit.types.schema import FlyteSchema # Global counter used to validate number of calls to cache @@ -385,3 +389,52 @@ def my_workflow(): # Confirm that we see a cache hit in the case of annotated dataframes. my_workflow() assert n_cached_task_calls == 1 + + +def test_cache_key_repetition(): + pt = Dict + lt = TypeEngine.to_literal_type(pt) + ctx = FlyteContextManager.current_context() + kwargs = { + "a": 0.41083513079747874, + "b": 0.7773927872515183, + "c": 17, + } + keys = set() + for i in range(0, 100): + lit = TypeEngine.to_literal(ctx, kwargs, Dict, lt) + lm = LiteralMap( + literals={ + "d": lit, + } + ) + key = _calculate_cache_key("t1", "007", lm) + keys.add(key) + + assert len(keys) == 1 + + +def test_stable_cache_key(): + """ + The intent of this test is to ensure cache keys are stable across releases and python versions. + """ + pt = Dict + lt = TypeEngine.to_literal_type(pt) + ctx = FlyteContextManager.current_context() + kwargs = { + "a": 42, + "b": "abcd", + "c": 0.12349, + "d": [1, 2, 3], + } + lit = TypeEngine.to_literal(ctx, kwargs, Dict, lt) + lm = LiteralMap( + literals={ + "lit_1": lit, + "lit_2": TypeEngine.to_literal(ctx, 99, int, LiteralType(simple=SimpleType.INTEGER)), + "lit_3": TypeEngine.to_literal(ctx, 3.14, float, LiteralType(simple=SimpleType.FLOAT)), + "lit_4": TypeEngine.to_literal(ctx, True, bool, LiteralType(simple=SimpleType.BOOLEAN)), + } + ) + key = _calculate_cache_key("task_name_1", "31415", lm) + assert key == "task_name_1-31415-a291dc6fe0be387c1cfd67b4c6b78259"